llvm-project/llvm/test/CodeGen/AArch64/sve-extload-icmp.ll
Ricardo Jesus 15fbdc2b96
[AArch64][SVE] Lower unpredicated loads/stores as LDR/STR. (#127837)
Currently, given:
```cpp
svuint8_t foo(uint8_t *x) {
  return svld1(svptrue_b8(), x);
}
```
We generate:
```gas
foo:
  ptrue   p0.b
  ld1b    { z0.b }, p0/z, [x0]
  ret
```
However, on little-endian and with unaligned memory accesses allowed, we
could instead be using LDR as follows:
```gas
foo:
  ldr     z0, [x0]
  ret
```

The second form avoids the predicate dependency.
Likewise for other types and stores.
2025-02-26 13:56:35 +00:00

100 lines
3.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
define <vscale x 8 x i8> @extload_icmp_nxv8i8(ptr %in) #0 {
; CHECK-LABEL: extload_icmp_nxv8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: cnot z0.h, p0/m, z0.h
; CHECK-NEXT: ret
%ld = load <vscale x 8 x i8>, ptr %in
%cmp = icmp eq <vscale x 8 x i8> %ld, zeroinitializer
%ex = zext <vscale x 8 x i1> %cmp to <vscale x 8 x i8>
ret <vscale x 8 x i8> %ex
}
define <vscale x 16 x i8> @extload_icmp_nxv16i8(ptr %in) #0 {
; CHECK-LABEL: extload_icmp_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: cnot z0.b, p0/m, z0.b
; CHECK-NEXT: ret
%ld = load <vscale x 16 x i8>, ptr %in
%cmp = icmp eq <vscale x 16 x i8> %ld, zeroinitializer
%ex = zext <vscale x 16 x i1> %cmp to <vscale x 16 x i8>
ret <vscale x 16 x i8> %ex
}
define <vscale x 4 x i16> @extload_icmp_nxv4i16(ptr %in) #0 {
; CHECK-LABEL: extload_icmp_nxv4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: cnot z0.s, p0/m, z0.s
; CHECK-NEXT: ret
%ld = load <vscale x 4 x i16>, ptr %in
%cmp = icmp eq <vscale x 4 x i16> %ld, zeroinitializer
%ex = zext <vscale x 4 x i1> %cmp to <vscale x 4 x i16>
ret <vscale x 4 x i16> %ex
}
define <vscale x 8 x i16> @extload_icmp_nxv8i16(ptr %in) #0 {
; CHECK-LABEL: extload_icmp_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: cnot z0.h, p0/m, z0.h
; CHECK-NEXT: ret
%ld = load <vscale x 8 x i16>, ptr %in
%cmp = icmp eq <vscale x 8 x i16> %ld, zeroinitializer
%ex = zext <vscale x 8 x i1> %cmp to <vscale x 8 x i16>
ret <vscale x 8 x i16> %ex
}
define <vscale x 2 x i32> @extload_icmp_nxv2i32(ptr %in) #0 {
; CHECK-LABEL: extload_icmp_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
; CHECK-NEXT: cnot z0.d, p0/m, z0.d
; CHECK-NEXT: ret
%ld = load <vscale x 2 x i32>, ptr %in
%cmp = icmp eq <vscale x 2 x i32> %ld, zeroinitializer
%ex = zext <vscale x 2 x i1> %cmp to <vscale x 2 x i32>
ret <vscale x 2 x i32> %ex
}
define <vscale x 4 x i32> @extload_icmp_nxv4i32(ptr %in) #0 {
; CHECK-LABEL: extload_icmp_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cnot z0.s, p0/m, z0.s
; CHECK-NEXT: ret
%ld = load <vscale x 4 x i32>, ptr %in
%cmp = icmp eq <vscale x 4 x i32> %ld, zeroinitializer
%ex = zext <vscale x 4 x i1> %cmp to <vscale x 4 x i32>
ret <vscale x 4 x i32> %ex
}
define <vscale x 2 x i64> @extload_icmp_nxv2i64(ptr %in) #0 {
; CHECK-LABEL: extload_icmp_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cnot z0.d, p0/m, z0.d
; CHECK-NEXT: ret
%ld = load <vscale x 2 x i64>, ptr %in
%cmp = icmp eq <vscale x 2 x i64> %ld, zeroinitializer
%ex = zext <vscale x 2 x i1> %cmp to <vscale x 2 x i64>
ret <vscale x 2 x i64> %ex
}
attributes #0 = { "target-features"="+sve" }