
This rewrites the fold from #129756 to apply to all types, including stores of i8s. This required adding a new `aarch64mfp8` MVT to represent FPR8 types on AArch64, which can be used to extract and store 8-bit values using b sub-registers. Follow on from: #129756 Closes: #131793
225 lines
7.2 KiB
LLVM
225 lines
7.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mattr=+sve < %s | FileCheck %s
|
|
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
|
|
define i1 @extract_icmp_v4i32_const_splat_rhs(<4 x i32> %a) {
|
|
; CHECK-LABEL: extract_icmp_v4i32_const_splat_rhs:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, v0.s[1]
|
|
; CHECK-NEXT: cmp w8, #5
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%icmp = icmp ult <4 x i32> %a, splat (i32 5)
|
|
%ext = extractelement <4 x i1> %icmp, i32 1
|
|
ret i1 %ext
|
|
}
|
|
|
|
define i1 @extract_icmp_v4i32_const_splat_lhs(<4 x i32> %a) {
|
|
; CHECK-LABEL: extract_icmp_v4i32_const_splat_lhs:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, v0.s[1]
|
|
; CHECK-NEXT: cmp w8, #7
|
|
; CHECK-NEXT: cset w0, hi
|
|
; CHECK-NEXT: ret
|
|
%icmp = icmp ult <4 x i32> splat(i32 7), %a
|
|
%ext = extractelement <4 x i1> %icmp, i32 1
|
|
ret i1 %ext
|
|
}
|
|
|
|
define i1 @extract_icmp_v4i32_const_vec_rhs(<4 x i32> %a) {
|
|
; CHECK-LABEL: extract_icmp_v4i32_const_vec_rhs:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, v0.s[1]
|
|
; CHECK-NEXT: cmp w8, #234
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%icmp = icmp ult <4 x i32> %a, <i32 5, i32 234, i32 -1, i32 7>
|
|
%ext = extractelement <4 x i1> %icmp, i32 1
|
|
ret i1 %ext
|
|
}
|
|
|
|
define i1 @extract_fcmp_v4f32_const_splat_rhs(<4 x float> %a) {
|
|
; CHECK-LABEL: extract_fcmp_v4f32_const_splat_rhs:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov s0, v0.s[1]
|
|
; CHECK-NEXT: fmov s1, #4.00000000
|
|
; CHECK-NEXT: fcmp s0, s1
|
|
; CHECK-NEXT: cset w0, lt
|
|
; CHECK-NEXT: ret
|
|
%fcmp = fcmp ult <4 x float> %a, splat(float 4.0e+0)
|
|
%ext = extractelement <4 x i1> %fcmp, i32 1
|
|
ret i1 %ext
|
|
}
|
|
|
|
; Tests the code in ExpandIntRes_SETCC
|
|
define i128 @extract_icmp_v1i128(ptr %p) {
|
|
; CHECK-LABEL: extract_icmp_v1i128:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp x9, x8, [x0]
|
|
; CHECK-NEXT: orr x8, x9, x8
|
|
; CHECK-NEXT: cmp x8, #0
|
|
; CHECK-NEXT: cset w8, eq
|
|
; CHECK-NEXT: sbfx x0, x8, #0, #1
|
|
; CHECK-NEXT: mov x1, x0
|
|
; CHECK-NEXT: ret
|
|
%load = load <1 x i128>, ptr %p, align 16
|
|
%cmp = icmp eq <1 x i128> %load, zeroinitializer
|
|
%sext = sext <1 x i1> %cmp to <1 x i128>
|
|
%res = extractelement <1 x i128> %sext, i32 0
|
|
ret i128 %res
|
|
}
|
|
|
|
define void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) {
|
|
; CHECK-LABEL: vector_loop_with_icmp:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: index z0.d, #0, #1
|
|
; CHECK-NEXT: mov w8, #2 // =0x2
|
|
; CHECK-NEXT: mov w9, #16 // =0x10
|
|
; CHECK-NEXT: dup v1.2d, x8
|
|
; CHECK-NEXT: add x8, x0, #4
|
|
; CHECK-NEXT: mov w10, #1 // =0x1
|
|
; CHECK-NEXT: b .LBB5_2
|
|
; CHECK-NEXT: .LBB5_1: // %pred.store.continue6
|
|
; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1
|
|
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
|
|
; CHECK-NEXT: subs x9, x9, #2
|
|
; CHECK-NEXT: add x8, x8, #8
|
|
; CHECK-NEXT: b.eq .LBB5_6
|
|
; CHECK-NEXT: .LBB5_2: // %vector.body
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: fmov x11, d0
|
|
; CHECK-NEXT: cmp x11, #14
|
|
; CHECK-NEXT: b.hi .LBB5_4
|
|
; CHECK-NEXT: // %bb.3: // %pred.store.if
|
|
; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1
|
|
; CHECK-NEXT: stur w10, [x8, #-4]
|
|
; CHECK-NEXT: .LBB5_4: // %pred.store.continue
|
|
; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1
|
|
; CHECK-NEXT: mov x11, v0.d[1]
|
|
; CHECK-NEXT: cmp x11, #14
|
|
; CHECK-NEXT: b.hi .LBB5_1
|
|
; CHECK-NEXT: // %bb.5: // %pred.store.if5
|
|
; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1
|
|
; CHECK-NEXT: str w10, [x8]
|
|
; CHECK-NEXT: b .LBB5_1
|
|
; CHECK-NEXT: .LBB5_6: // %for.cond.cleanup
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
br label %vector.body
|
|
|
|
vector.body:
|
|
%index = phi i64 [ 0, %entry ], [ %index.next, %pred.store.continue6 ]
|
|
%vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %entry ], [ %vec.ind.next, %pred.store.continue6 ]
|
|
%vec.cmp = icmp ult <2 x i64> %vec.ind, <i64 15, i64 15>
|
|
%c0 = extractelement <2 x i1> %vec.cmp, i64 0
|
|
br i1 %c0, label %pred.store.if, label %pred.store.continue
|
|
|
|
pred.store.if:
|
|
%arrayidx = getelementptr inbounds i32, ptr %dest, i64 %index
|
|
store i32 1, ptr %arrayidx, align 4
|
|
br label %pred.store.continue
|
|
|
|
pred.store.continue:
|
|
%c1 = extractelement <2 x i1> %vec.cmp, i64 1
|
|
br i1 %c1, label %pred.store.if5, label %pred.store.continue6
|
|
|
|
pred.store.if5:
|
|
%indexp1 = or disjoint i64 %index, 1
|
|
%arrayidx2 = getelementptr inbounds i32, ptr %dest, i64 %indexp1
|
|
store i32 1, ptr %arrayidx2, align 4
|
|
br label %pred.store.continue6
|
|
|
|
pred.store.continue6:
|
|
%index.next = add i64 %index, 2
|
|
%vec.ind.next = add <2 x i64> %vec.ind, <i64 2, i64 2>
|
|
%index.cmp = icmp eq i64 %index.next, 16
|
|
br i1 %index.cmp, label %for.cond.cleanup, label %vector.body
|
|
|
|
for.cond.cleanup:
|
|
ret void
|
|
}
|
|
|
|
|
|
; TODO: Combine the sbfx(cset) into a csetm
|
|
define i32 @issue_121372(<4 x i32> %v) {
|
|
; CHECK-LABEL: issue_121372:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov w8, s0
|
|
; CHECK-NEXT: cmp w8, #0
|
|
; CHECK-NEXT: cset w8, eq
|
|
; CHECK-NEXT: sbfx w8, w8, #0, #1
|
|
; CHECK-NEXT: cmp w8, #1
|
|
; CHECK-NEXT: csetm w0, lt
|
|
; CHECK-NEXT: ret
|
|
%cmp_ule = icmp ule <4 x i32> %v, zeroinitializer
|
|
%sext_v4i1 = sext <4 x i1> %cmp_ule to <4 x i32>
|
|
%cmp_sge = icmp sge <4 x i32> zeroinitializer, %sext_v4i1
|
|
%ext = extractelement <4 x i1> %cmp_sge, i32 0
|
|
%res = sext i1 %ext to i32
|
|
ret i32 %res
|
|
}
|
|
|
|
|
|
; Negative tests
|
|
|
|
define i1 @extract_icmp_v4i32_splat_rhs(<4 x i32> %a, i32 %b) {
|
|
; CHECK-LABEL: extract_icmp_v4i32_splat_rhs:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: dup v1.4s, w0
|
|
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: xtn v0.4h, v0.4s
|
|
; CHECK-NEXT: umov w8, v0.h[1]
|
|
; CHECK-NEXT: and w0, w8, #0x1
|
|
; CHECK-NEXT: ret
|
|
%ins = insertelement <4 x i32> poison, i32 %b, i32 0
|
|
%splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
|
|
%icmp = icmp ult <4 x i32> %a, %splat
|
|
%ext = extractelement <4 x i1> %icmp, i32 1
|
|
ret i1 %ext
|
|
}
|
|
|
|
define i1 @extract_icmp_v4i32_splat_rhs_mul_use(<4 x i32> %a, ptr %p) {
|
|
; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_mul_use:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.4s, #235
|
|
; CHECK-NEXT: adrp x8, .LCPI8_0
|
|
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0]
|
|
; CHECK-NEXT: mov x8, x0
|
|
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: xtn v1.4h, v0.4s
|
|
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
|
|
; CHECK-NEXT: addv s0, v0.4s
|
|
; CHECK-NEXT: umov w9, v1.h[1]
|
|
; CHECK-NEXT: str b0, [x8]
|
|
; CHECK-NEXT: and w0, w9, #0x1
|
|
; CHECK-NEXT: ret
|
|
%icmp = icmp ult <4 x i32> %a, splat(i32 235)
|
|
%ext = extractelement <4 x i1> %icmp, i32 1
|
|
store <4 x i1> %icmp, ptr %p, align 4
|
|
ret i1 %ext
|
|
}
|
|
|
|
define i1 @extract_icmp_v4i32_splat_rhs_unknown_idx(<4 x i32> %a, i32 %c) {
|
|
; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_unknown_idx:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub sp, sp, #16
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: movi v1.4s, #127
|
|
; CHECK-NEXT: add x8, sp, #8
|
|
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
|
; CHECK-NEXT: bfi x8, x0, #1, #2
|
|
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: xtn v0.4h, v0.4s
|
|
; CHECK-NEXT: str d0, [sp, #8]
|
|
; CHECK-NEXT: ldrh w8, [x8]
|
|
; CHECK-NEXT: and w0, w8, #0x1
|
|
; CHECK-NEXT: add sp, sp, #16
|
|
; CHECK-NEXT: ret
|
|
%icmp = icmp ult <4 x i32> %a, splat(i32 127)
|
|
%ext = extractelement <4 x i1> %icmp, i32 %c
|
|
ret i1 %ext
|
|
}
|
|
|