llvm-project/llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll
Ricardo Jesus 15fbdc2b96
[AArch64][SVE] Lower unpredicated loads/stores as LDR/STR. (#127837)
Currently, given:
```cpp
svuint8_t foo(uint8_t *x) {
  return svld1(svptrue_b8(), x);
}
```
We generate:
```gas
foo:
  ptrue   p0.b
  ld1b    { z0.b }, p0/z, [x0]
  ret
```
However, on little-endian and with unaligned memory accesses allowed, we
could instead be using LDR as follows:
```gas
foo:
  ldr     z0, [x0]
  ret
```

The second form avoids the predicate dependency.
Likewise for other types and stores.
2025-02-26 13:56:35 +00:00

111 lines
4.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple arm64-- | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
; Test the (concat_vectors (trunc), (trunc)) pattern.
define <4 x i16> @test_concat_truncate_v2i64_to_v4i16(<2 x i64> %a, <2 x i64> %b) #0 {
; CHECK-LABEL: test_concat_truncate_v2i64_to_v4i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
entry:
%at = trunc <2 x i64> %a to <2 x i16>
%bt = trunc <2 x i64> %b to <2 x i16>
%shuffle = shufflevector <2 x i16> %at, <2 x i16> %bt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %shuffle
}
define <4 x i32> @test_concat_truncate_v2i64_to_v4i32(<2 x i64> %a, <2 x i64> %b) #0 {
; CHECK-LABEL: test_concat_truncate_v2i64_to_v4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
entry:
%at = trunc <2 x i64> %a to <2 x i32>
%bt = trunc <2 x i64> %b to <2 x i32>
%shuffle = shufflevector <2 x i32> %at, <2 x i32> %bt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %shuffle
}
define <4 x i16> @test_concat_truncate_v2i32_to_v4i16(<2 x i32> %a, <2 x i32> %b) #0 {
; CHECK-LABEL: test_concat_truncate_v2i32_to_v4i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
entry:
%at = trunc <2 x i32> %a to <2 x i16>
%bt = trunc <2 x i32> %b to <2 x i16>
%shuffle = shufflevector <2 x i16> %at, <2 x i16> %bt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %shuffle
}
define <8 x i8> @test_concat_truncate_v4i32_to_v8i8(<4 x i32> %a, <4 x i32> %b) #0 {
; CHECK-LABEL: test_concat_truncate_v4i32_to_v8i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
entry:
%at = trunc <4 x i32> %a to <4 x i8>
%bt = trunc <4 x i32> %b to <4 x i8>
%shuffle = shufflevector <4 x i8> %at, <4 x i8> %bt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i8> %shuffle
}
define <8 x i16> @test_concat_truncate_v4i32_to_v8i16(<4 x i32> %a, <4 x i32> %b) #0 {
; CHECK-LABEL: test_concat_truncate_v4i32_to_v8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
entry:
%at = trunc <4 x i32> %a to <4 x i16>
%bt = trunc <4 x i32> %b to <4 x i16>
%shuffle = shufflevector <4 x i16> %at, <4 x i16> %bt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %shuffle
}
define <8 x i8> @test_concat_truncate_v4i16_to_v8i8(<4 x i16> %a, <4 x i16> %b) #0 {
; CHECK-LABEL: test_concat_truncate_v4i16_to_v8i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: uzp1 v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
entry:
%at = trunc <4 x i16> %a to <4 x i8>
%bt = trunc <4 x i16> %b to <4 x i8>
%shuffle = shufflevector <4 x i8> %at, <4 x i8> %bt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i8> %shuffle
}
define <16 x i8> @test_concat_truncate_v8i16_to_v16i8(<8 x i16> %a, <8 x i16> %b) #0 {
; CHECK-LABEL: test_concat_truncate_v8i16_to_v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
entry:
%at = trunc <8 x i16> %a to <8 x i8>
%bt = trunc <8 x i16> %b to <8 x i8>
%shuffle = shufflevector <8 x i8> %at, <8 x i8> %bt, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %shuffle
}
; The concat_vectors operation in this test is introduced when splitting
; the fptrunc operation due to the split <vscale x 4 x double> input operand.
define void @test_concat_fptrunc_v4f64_to_v4f32(ptr %ptr) #1 {
; CHECK-LABEL: test_concat_fptrunc_v4f64_to_v4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov z0.s, #1.00000000
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
entry:
%0 = shufflevector <vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
%1 = fptrunc <vscale x 4 x double> %0 to <vscale x 4 x float>
store <vscale x 4 x float> %1, ptr %ptr, align 4
ret void
}
attributes #0 = { nounwind }
attributes #1 = { "target-features"="+sve" }