
Currently, given: ```cpp svuint8_t foo(uint8_t *x) { return svld1(svptrue_b8(), x); } ``` We generate: ```gas foo: ptrue p0.b ld1b { z0.b }, p0/z, [x0] ret ``` However, on little-endian and with unaligned memory accesses allowed, we could instead be using LDR as follows: ```gas foo: ldr z0, [x0] ret ``` The second form avoids the predicate dependency. Likewise for other types and stores.
93 lines
3.0 KiB
LLVM
93 lines
3.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix CHECK-NO-STRICT-ALIGN %s
|
|
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+strict-align < %s | FileCheck %s
|
|
|
|
define void @nxv16i8(ptr %ldptr, ptr %stptr) {
|
|
; CHECK-NO-STRICT-ALIGN-LABEL: nxv16i8:
|
|
; CHECK-NO-STRICT-ALIGN: // %bb.0:
|
|
; CHECK-NO-STRICT-ALIGN-NEXT: ldr z0, [x0]
|
|
; CHECK-NO-STRICT-ALIGN-NEXT: str z0, [x1]
|
|
; CHECK-NO-STRICT-ALIGN-NEXT: ret
|
|
;
|
|
; CHECK-LABEL: nxv16i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
|
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%l3 = load <vscale x 16 x i8>, ptr %ldptr, align 1
|
|
store <vscale x 16 x i8> %l3, ptr %stptr, align 1
|
|
ret void
|
|
}
|
|
|
|
define void @nxv8i16(ptr %ldptr, ptr %stptr) {
|
|
; CHECK-NO-STRICT-ALIGN-LABEL: nxv8i16:
|
|
; CHECK-NO-STRICT-ALIGN: // %bb.0:
|
|
; CHECK-NO-STRICT-ALIGN-NEXT: ldr z0, [x0]
|
|
; CHECK-NO-STRICT-ALIGN-NEXT: str z0, [x1]
|
|
; CHECK-NO-STRICT-ALIGN-NEXT: ret
|
|
;
|
|
; CHECK-LABEL: nxv8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%l3 = load <vscale x 8 x i16>, ptr %ldptr, align 2
|
|
store <vscale x 8 x i16> %l3, ptr %stptr, align 2
|
|
ret void
|
|
}
|
|
|
|
define void @nxv4i32(ptr %ldptr, ptr %stptr) {
|
|
; CHECK-NO-STRICT-ALIGN-LABEL: nxv4i32:
|
|
; CHECK-NO-STRICT-ALIGN: // %bb.0:
|
|
; CHECK-NO-STRICT-ALIGN-NEXT: ldr z0, [x0]
|
|
; CHECK-NO-STRICT-ALIGN-NEXT: str z0, [x1]
|
|
; CHECK-NO-STRICT-ALIGN-NEXT: ret
|
|
;
|
|
; CHECK-LABEL: nxv4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%l3 = load <vscale x 4 x i32>, ptr %ldptr, align 4
|
|
store <vscale x 4 x i32> %l3, ptr %stptr, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @nxv2i64(ptr %ldptr, ptr %stptr) {
|
|
; CHECK-NO-STRICT-ALIGN-LABEL: nxv2i64:
|
|
; CHECK-NO-STRICT-ALIGN: // %bb.0:
|
|
; CHECK-NO-STRICT-ALIGN-NEXT: ldr z0, [x0]
|
|
; CHECK-NO-STRICT-ALIGN-NEXT: str z0, [x1]
|
|
; CHECK-NO-STRICT-ALIGN-NEXT: ret
|
|
;
|
|
; CHECK-LABEL: nxv2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%l3 = load <vscale x 2 x i64>, ptr %ldptr, align 8
|
|
store <vscale x 2 x i64> %l3, ptr %stptr, align 8
|
|
ret void
|
|
}
|
|
|
|
define void @nxv16i1(ptr %ldptr, ptr %stptr) {
|
|
; CHECK-NO-STRICT-ALIGN-LABEL: nxv16i1:
|
|
; CHECK-NO-STRICT-ALIGN: // %bb.0:
|
|
; CHECK-NO-STRICT-ALIGN-NEXT: ldr p0, [x0]
|
|
; CHECK-NO-STRICT-ALIGN-NEXT: str p0, [x1]
|
|
; CHECK-NO-STRICT-ALIGN-NEXT: ret
|
|
;
|
|
; CHECK-LABEL: nxv16i1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr p0, [x0]
|
|
; CHECK-NEXT: str p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%l3 = load <vscale x 16 x i1>, ptr %ldptr, align 2
|
|
store <vscale x 16 x i1> %l3, ptr %stptr, align 2
|
|
ret void
|
|
}
|