
Refresh of the generic scheduling model to use A510 instead of A55. Main benefits are to the little core, and introducing SVE scheduling information. Changes tested on various OoO cores, no performance degradation is seen. Differential Revision: https://reviews.llvm.org/D156799
127 lines
3.4 KiB
LLVM
127 lines
3.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
|
|
|
; ANDV
|
|
|
|
define i1 @andv_nxv32i1(<vscale x 32 x i1> %a) {
|
|
; CHECK-LABEL: andv_nxv32i1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p2.b
|
|
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
|
|
; CHECK-NEXT: nots p0.b, p2/z, p0.b
|
|
; CHECK-NEXT: cset w0, eq
|
|
; CHECK-NEXT: ret
|
|
%res = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %a)
|
|
ret i1 %res
|
|
}
|
|
|
|
define i1 @andv_nxv64i1(<vscale x 64 x i1> %a) {
|
|
; CHECK-LABEL: andv_nxv64i1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and p1.b, p1/z, p1.b, p3.b
|
|
; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b
|
|
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
|
|
; CHECK-NEXT: ptrue p1.b
|
|
; CHECK-NEXT: nots p0.b, p1/z, p0.b
|
|
; CHECK-NEXT: cset w0, eq
|
|
; CHECK-NEXT: ret
|
|
%res = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> %a)
|
|
ret i1 %res
|
|
}
|
|
|
|
; ORV
|
|
|
|
define i1 @orv_nxv32i1(<vscale x 32 x i1> %a) {
|
|
; CHECK-LABEL: orv_nxv32i1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
|
|
; CHECK-NEXT: ptest p0, p0.b
|
|
; CHECK-NEXT: cset w0, ne
|
|
; CHECK-NEXT: ret
|
|
%res = call i1 @llvm.vector.reduce.or.nxv32i1(<vscale x 32 x i1> %a)
|
|
ret i1 %res
|
|
}
|
|
|
|
; XORV
|
|
|
|
define i1 @xorv_nxv32i1(<vscale x 32 x i1> %a) {
|
|
; CHECK-LABEL: xorv_nxv32i1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p2.b
|
|
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
|
|
; CHECK-NEXT: cntp x8, p2, p0.b
|
|
; CHECK-NEXT: and w0, w8, #0x1
|
|
; CHECK-NEXT: ret
|
|
%res = call i1 @llvm.vector.reduce.xor.nxv32i1(<vscale x 32 x i1> %a)
|
|
ret i1 %res
|
|
}
|
|
|
|
; SMAXV
|
|
|
|
define i1 @smaxv_nxv32i1(<vscale x 32 x i1> %a) {
|
|
; CHECK-LABEL: smaxv_nxv32i1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p2.b
|
|
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
|
|
; CHECK-NEXT: nots p0.b, p2/z, p0.b
|
|
; CHECK-NEXT: cset w0, eq
|
|
; CHECK-NEXT: ret
|
|
%res = call i1 @llvm.vector.reduce.smax.nxv32i1(<vscale x 32 x i1> %a)
|
|
ret i1 %res
|
|
}
|
|
|
|
; SMINV
|
|
|
|
define i1 @sminv_nxv32i1(<vscale x 32 x i1> %a) {
|
|
; CHECK-LABEL: sminv_nxv32i1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
|
|
; CHECK-NEXT: ptest p0, p0.b
|
|
; CHECK-NEXT: cset w0, ne
|
|
; CHECK-NEXT: ret
|
|
%res = call i1 @llvm.vector.reduce.smin.nxv32i1(<vscale x 32 x i1> %a)
|
|
ret i1 %res
|
|
}
|
|
|
|
; UMAXV
|
|
|
|
define i1 @umaxv_nxv32i1(<vscale x 32 x i1> %a) {
|
|
; CHECK-LABEL: umaxv_nxv32i1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
|
|
; CHECK-NEXT: ptest p0, p0.b
|
|
; CHECK-NEXT: cset w0, ne
|
|
; CHECK-NEXT: ret
|
|
%res = call i1 @llvm.vector.reduce.umax.nxv32i1(<vscale x 32 x i1> %a)
|
|
ret i1 %res
|
|
}
|
|
|
|
; UMINV
|
|
|
|
define i1 @uminv_nxv32i1(<vscale x 32 x i1> %a) {
|
|
; CHECK-LABEL: uminv_nxv32i1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p2.b
|
|
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
|
|
; CHECK-NEXT: nots p0.b, p2/z, p0.b
|
|
; CHECK-NEXT: cset w0, eq
|
|
; CHECK-NEXT: ret
|
|
%res = call i1 @llvm.vector.reduce.umin.nxv32i1(<vscale x 32 x i1> %a)
|
|
ret i1 %res
|
|
}
|
|
|
|
declare i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1>)
|
|
declare i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1>)
|
|
|
|
declare i1 @llvm.vector.reduce.or.nxv32i1(<vscale x 32 x i1>)
|
|
|
|
declare i1 @llvm.vector.reduce.xor.nxv32i1(<vscale x 32 x i1>)
|
|
|
|
declare i1 @llvm.vector.reduce.smax.nxv32i1(<vscale x 32 x i1>)
|
|
|
|
declare i1 @llvm.vector.reduce.smin.nxv32i1(<vscale x 32 x i1>)
|
|
|
|
declare i1 @llvm.vector.reduce.umax.nxv32i1(<vscale x 32 x i1>)
|
|
|
|
declare i1 @llvm.vector.reduce.umin.nxv32i1(<vscale x 32 x i1>)
|