llvm-project/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll
Björn Pettersson 5e5e300d07
[SelectionDAG] Fix bug related to demanded bits/elts for BITCAST (#145902)
When we have a BITCAST and the source type is a vector with smaller
elements compared to the destination type, then we need to demand all
the source elements that make up the demanded elts for the result when
doing recursive calls to SimplifyDemandedBits,
SimplifyDemandedVectorElts and SimplifyMultipleUseDemandedBits. Problem
is that those simplifications are allowed to turn non-demanded elements
of a vector into POISON, so unless we demand all source elements that
make up the result there is a risk that the result would be more
poisonous (even for demanded elts) after the simplification.

The patch fixes some bugs in SimplifyMultipleUseDemandedBits and
SimplifyDemandedBits for situations when we did not consider the problem
described above. Now we make sure that we also demand vector elements
that "must not be turned into poison" even if those elements correspond
to bits that does not need to be defined according to the DemandedBits
mask.

Fixes #138513
2026-02-23 14:38:07 +01:00

581 lines
23 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -mtriple=aarch64 -mattr=+bf16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,NEON-FIXED
; RUN: llc -mtriple=aarch64 -mattr=+sve,+bf16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,SVE-FIXED
define i8 @extract_last_i8(<16 x i8> %data, <16 x i8> %mask, i8 %passthru) {
; NEON-FIXED-LABEL: extract_last_i8:
; NEON-FIXED: // %bb.0:
; NEON-FIXED-NEXT: sub sp, sp, #16
; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16
; NEON-FIXED-NEXT: cmeq v2.16b, v1.16b, #0
; NEON-FIXED-NEXT: adrp x8, .LCPI0_0
; NEON-FIXED-NEXT: cmtst v1.16b, v1.16b, v1.16b
; NEON-FIXED-NEXT: ldr q3, [x8, :lo12:.LCPI0_0]
; NEON-FIXED-NEXT: mov x9, sp
; NEON-FIXED-NEXT: str q0, [sp]
; NEON-FIXED-NEXT: bic v2.16b, v3.16b, v2.16b
; NEON-FIXED-NEXT: umaxv b1, v1.16b
; NEON-FIXED-NEXT: umaxv b2, v2.16b
; NEON-FIXED-NEXT: fmov w8, s2
; NEON-FIXED-NEXT: bfxil x9, x8, #0, #4
; NEON-FIXED-NEXT: ldrb w8, [x9]
; NEON-FIXED-NEXT: fmov w9, s1
; NEON-FIXED-NEXT: tst w9, #0x1
; NEON-FIXED-NEXT: csel w0, w8, w0, ne
; NEON-FIXED-NEXT: add sp, sp, #16
; NEON-FIXED-NEXT: ret
;
; SVE-FIXED-LABEL: extract_last_i8:
; SVE-FIXED: // %bb.0:
; SVE-FIXED-NEXT: sub sp, sp, #16
; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
; SVE-FIXED-NEXT: index z2.b, #0, #1
; SVE-FIXED-NEXT: cmeq v3.16b, v1.16b, #0
; SVE-FIXED-NEXT: cmtst v1.16b, v1.16b, v1.16b
; SVE-FIXED-NEXT: mov x9, sp
; SVE-FIXED-NEXT: str q0, [sp]
; SVE-FIXED-NEXT: bic v2.16b, v2.16b, v3.16b
; SVE-FIXED-NEXT: umaxv b1, v1.16b
; SVE-FIXED-NEXT: umaxv b2, v2.16b
; SVE-FIXED-NEXT: fmov w8, s2
; SVE-FIXED-NEXT: bfxil x9, x8, #0, #4
; SVE-FIXED-NEXT: ldrb w8, [x9]
; SVE-FIXED-NEXT: fmov w9, s1
; SVE-FIXED-NEXT: tst w9, #0x1
; SVE-FIXED-NEXT: csel w0, w8, w0, ne
; SVE-FIXED-NEXT: add sp, sp, #16
; SVE-FIXED-NEXT: ret
%notzero = icmp ne <16 x i8> %mask, zeroinitializer
%res = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> %data, <16 x i1> %notzero, i8 %passthru)
ret i8 %res
}
define i16 @extract_last_i16(<8 x i16> %data, <8 x i16> %mask, i16 %passthru) {
; NEON-FIXED-LABEL: extract_last_i16:
; NEON-FIXED: // %bb.0:
; NEON-FIXED-NEXT: sub sp, sp, #16
; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16
; NEON-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h
; NEON-FIXED-NEXT: adrp x8, .LCPI1_0
; NEON-FIXED-NEXT: mov x9, sp
; NEON-FIXED-NEXT: ldr d3, [x8, :lo12:.LCPI1_0]
; NEON-FIXED-NEXT: str q0, [sp]
; NEON-FIXED-NEXT: xtn v2.8b, v1.8h
; NEON-FIXED-NEXT: umaxv h1, v1.8h
; NEON-FIXED-NEXT: and v2.8b, v2.8b, v3.8b
; NEON-FIXED-NEXT: umaxv b2, v2.8b
; NEON-FIXED-NEXT: fmov w8, s2
; NEON-FIXED-NEXT: bfi x9, x8, #1, #3
; NEON-FIXED-NEXT: ldrh w8, [x9]
; NEON-FIXED-NEXT: fmov w9, s1
; NEON-FIXED-NEXT: tst w9, #0x1
; NEON-FIXED-NEXT: csel w0, w8, w0, ne
; NEON-FIXED-NEXT: add sp, sp, #16
; NEON-FIXED-NEXT: ret
;
; SVE-FIXED-LABEL: extract_last_i16:
; SVE-FIXED: // %bb.0:
; SVE-FIXED-NEXT: sub sp, sp, #16
; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
; SVE-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h
; SVE-FIXED-NEXT: index z3.b, #0, #1
; SVE-FIXED-NEXT: mov x9, sp
; SVE-FIXED-NEXT: str q0, [sp]
; SVE-FIXED-NEXT: xtn v2.8b, v1.8h
; SVE-FIXED-NEXT: umaxv h1, v1.8h
; SVE-FIXED-NEXT: and v2.8b, v2.8b, v3.8b
; SVE-FIXED-NEXT: umaxv b2, v2.8b
; SVE-FIXED-NEXT: fmov w8, s2
; SVE-FIXED-NEXT: bfi x9, x8, #1, #3
; SVE-FIXED-NEXT: ldrh w8, [x9]
; SVE-FIXED-NEXT: fmov w9, s1
; SVE-FIXED-NEXT: tst w9, #0x1
; SVE-FIXED-NEXT: csel w0, w8, w0, ne
; SVE-FIXED-NEXT: add sp, sp, #16
; SVE-FIXED-NEXT: ret
%notzero = icmp ne <8 x i16> %mask, zeroinitializer
%res = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> %data, <8 x i1> %notzero, i16 %passthru)
ret i16 %res
}
define i32 @extract_last_i32(<4 x i32> %data, <4 x i32> %mask, i32 %passthru) {
; NEON-FIXED-LABEL: extract_last_i32:
; NEON-FIXED: // %bb.0:
; NEON-FIXED-NEXT: sub sp, sp, #16
; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16
; NEON-FIXED-NEXT: cmtst v1.4s, v1.4s, v1.4s
; NEON-FIXED-NEXT: adrp x8, .LCPI2_0
; NEON-FIXED-NEXT: mov x9, sp
; NEON-FIXED-NEXT: ldr d3, [x8, :lo12:.LCPI2_0]
; NEON-FIXED-NEXT: str q0, [sp]
; NEON-FIXED-NEXT: xtn v2.4h, v1.4s
; NEON-FIXED-NEXT: umaxv s1, v1.4s
; NEON-FIXED-NEXT: and v2.8b, v2.8b, v3.8b
; NEON-FIXED-NEXT: umaxv h2, v2.4h
; NEON-FIXED-NEXT: fmov w8, s2
; NEON-FIXED-NEXT: bfi x9, x8, #2, #2
; NEON-FIXED-NEXT: ldr w8, [x9]
; NEON-FIXED-NEXT: fmov w9, s1
; NEON-FIXED-NEXT: tst w9, #0x1
; NEON-FIXED-NEXT: csel w0, w8, w0, ne
; NEON-FIXED-NEXT: add sp, sp, #16
; NEON-FIXED-NEXT: ret
;
; SVE-FIXED-LABEL: extract_last_i32:
; SVE-FIXED: // %bb.0:
; SVE-FIXED-NEXT: sub sp, sp, #16
; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
; SVE-FIXED-NEXT: cmtst v1.4s, v1.4s, v1.4s
; SVE-FIXED-NEXT: index z3.h, #0, #1
; SVE-FIXED-NEXT: mov x9, sp
; SVE-FIXED-NEXT: str q0, [sp]
; SVE-FIXED-NEXT: xtn v2.4h, v1.4s
; SVE-FIXED-NEXT: umaxv s1, v1.4s
; SVE-FIXED-NEXT: and v2.8b, v2.8b, v3.8b
; SVE-FIXED-NEXT: umaxv h2, v2.4h
; SVE-FIXED-NEXT: fmov w8, s2
; SVE-FIXED-NEXT: bfi x9, x8, #2, #2
; SVE-FIXED-NEXT: ldr w8, [x9]
; SVE-FIXED-NEXT: fmov w9, s1
; SVE-FIXED-NEXT: tst w9, #0x1
; SVE-FIXED-NEXT: csel w0, w8, w0, ne
; SVE-FIXED-NEXT: add sp, sp, #16
; SVE-FIXED-NEXT: ret
%notzero = icmp ne <4 x i32> %mask, zeroinitializer
%res = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> %data, <4 x i1> %notzero, i32 %passthru)
ret i32 %res
}
define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) {
; NEON-FIXED-LABEL: extract_last_i64:
; NEON-FIXED: // %bb.0:
; NEON-FIXED-NEXT: sub sp, sp, #16
; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16
; NEON-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d
; NEON-FIXED-NEXT: adrp x8, .LCPI3_0
; NEON-FIXED-NEXT: mov x9, sp
; NEON-FIXED-NEXT: ldr d3, [x8, :lo12:.LCPI3_0]
; NEON-FIXED-NEXT: str q0, [sp]
; NEON-FIXED-NEXT: xtn v2.2s, v1.2d
; NEON-FIXED-NEXT: umaxv s1, v1.4s
; NEON-FIXED-NEXT: and v2.8b, v2.8b, v3.8b
; NEON-FIXED-NEXT: umaxp v2.2s, v2.2s, v2.2s
; NEON-FIXED-NEXT: fmov w8, s2
; NEON-FIXED-NEXT: bfi x9, x8, #3, #1
; NEON-FIXED-NEXT: ldr x8, [x9]
; NEON-FIXED-NEXT: fmov w9, s1
; NEON-FIXED-NEXT: tst w9, #0x1
; NEON-FIXED-NEXT: csel x0, x8, x0, ne
; NEON-FIXED-NEXT: add sp, sp, #16
; NEON-FIXED-NEXT: ret
;
; SVE-FIXED-LABEL: extract_last_i64:
; SVE-FIXED: // %bb.0:
; SVE-FIXED-NEXT: sub sp, sp, #16
; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
; SVE-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d
; SVE-FIXED-NEXT: index z3.s, #0, #1
; SVE-FIXED-NEXT: mov x9, sp
; SVE-FIXED-NEXT: str q0, [sp]
; SVE-FIXED-NEXT: xtn v2.2s, v1.2d
; SVE-FIXED-NEXT: umaxv s1, v1.4s
; SVE-FIXED-NEXT: and v2.8b, v2.8b, v3.8b
; SVE-FIXED-NEXT: umaxp v2.2s, v2.2s, v2.2s
; SVE-FIXED-NEXT: fmov w8, s2
; SVE-FIXED-NEXT: bfi x9, x8, #3, #1
; SVE-FIXED-NEXT: ldr x8, [x9]
; SVE-FIXED-NEXT: fmov w9, s1
; SVE-FIXED-NEXT: tst w9, #0x1
; SVE-FIXED-NEXT: csel x0, x8, x0, ne
; SVE-FIXED-NEXT: add sp, sp, #16
; SVE-FIXED-NEXT: ret
%notzero = icmp ne <2 x i64> %mask, zeroinitializer
%res = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> %data, <2 x i1> %notzero, i64 %passthru)
ret i64 %res
}
define half @extract_last_half(<8 x half> %data, <8 x i16> %mask, half %passthru) {
; NEON-FIXED-LABEL: extract_last_half:
; NEON-FIXED: // %bb.0:
; NEON-FIXED-NEXT: sub sp, sp, #16
; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16
; NEON-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h
; NEON-FIXED-NEXT: adrp x8, .LCPI4_0
; NEON-FIXED-NEXT: mov x9, sp
; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI4_0]
; NEON-FIXED-NEXT: str q0, [sp]
; NEON-FIXED-NEXT: // kill: def $h2 killed $h2 def $s2
; NEON-FIXED-NEXT: xtn v3.8b, v1.8h
; NEON-FIXED-NEXT: umaxv h1, v1.8h
; NEON-FIXED-NEXT: and v3.8b, v3.8b, v4.8b
; NEON-FIXED-NEXT: umaxv b3, v3.8b
; NEON-FIXED-NEXT: fmov w8, s3
; NEON-FIXED-NEXT: bfi x9, x8, #1, #3
; NEON-FIXED-NEXT: fmov w8, s1
; NEON-FIXED-NEXT: ldr h0, [x9]
; NEON-FIXED-NEXT: tst w8, #0x1
; NEON-FIXED-NEXT: fcsel s0, s0, s2, ne
; NEON-FIXED-NEXT: // kill: def $h0 killed $h0 killed $s0
; NEON-FIXED-NEXT: add sp, sp, #16
; NEON-FIXED-NEXT: ret
;
; SVE-FIXED-LABEL: extract_last_half:
; SVE-FIXED: // %bb.0:
; SVE-FIXED-NEXT: sub sp, sp, #16
; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
; SVE-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h
; SVE-FIXED-NEXT: index z4.b, #0, #1
; SVE-FIXED-NEXT: mov x9, sp
; SVE-FIXED-NEXT: str q0, [sp]
; SVE-FIXED-NEXT: xtn v3.8b, v1.8h
; SVE-FIXED-NEXT: umaxv h1, v1.8h
; SVE-FIXED-NEXT: and v3.8b, v3.8b, v4.8b
; SVE-FIXED-NEXT: umaxv b3, v3.8b
; SVE-FIXED-NEXT: fmov w8, s3
; SVE-FIXED-NEXT: bfi x9, x8, #1, #3
; SVE-FIXED-NEXT: fmov w8, s1
; SVE-FIXED-NEXT: ldr h0, [x9]
; SVE-FIXED-NEXT: tst w8, #0x1
; SVE-FIXED-NEXT: fcsel h0, h0, h2, ne
; SVE-FIXED-NEXT: add sp, sp, #16
; SVE-FIXED-NEXT: ret
%notzero = icmp ne <8 x i16> %mask, zeroinitializer
%res = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> %data, <8 x i1> %notzero, half %passthru)
ret half %res
}
define bfloat @extract_last_bfloat(<8 x bfloat> %data, <8 x i16> %mask, bfloat %passthru) {
; NEON-FIXED-LABEL: extract_last_bfloat:
; NEON-FIXED: // %bb.0:
; NEON-FIXED-NEXT: sub sp, sp, #16
; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16
; NEON-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h
; NEON-FIXED-NEXT: adrp x8, .LCPI5_0
; NEON-FIXED-NEXT: mov x9, sp
; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI5_0]
; NEON-FIXED-NEXT: str q0, [sp]
; NEON-FIXED-NEXT: // kill: def $h2 killed $h2 def $s2
; NEON-FIXED-NEXT: xtn v3.8b, v1.8h
; NEON-FIXED-NEXT: umaxv h1, v1.8h
; NEON-FIXED-NEXT: and v3.8b, v3.8b, v4.8b
; NEON-FIXED-NEXT: umaxv b3, v3.8b
; NEON-FIXED-NEXT: fmov w8, s3
; NEON-FIXED-NEXT: bfi x9, x8, #1, #3
; NEON-FIXED-NEXT: fmov w8, s1
; NEON-FIXED-NEXT: ldr h0, [x9]
; NEON-FIXED-NEXT: tst w8, #0x1
; NEON-FIXED-NEXT: fcsel s0, s0, s2, ne
; NEON-FIXED-NEXT: // kill: def $h0 killed $h0 killed $s0
; NEON-FIXED-NEXT: add sp, sp, #16
; NEON-FIXED-NEXT: ret
;
; SVE-FIXED-LABEL: extract_last_bfloat:
; SVE-FIXED: // %bb.0:
; SVE-FIXED-NEXT: sub sp, sp, #16
; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
; SVE-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h
; SVE-FIXED-NEXT: index z4.b, #0, #1
; SVE-FIXED-NEXT: mov x9, sp
; SVE-FIXED-NEXT: str q0, [sp]
; SVE-FIXED-NEXT: xtn v3.8b, v1.8h
; SVE-FIXED-NEXT: umaxv h1, v1.8h
; SVE-FIXED-NEXT: and v3.8b, v3.8b, v4.8b
; SVE-FIXED-NEXT: umaxv b3, v3.8b
; SVE-FIXED-NEXT: fmov w8, s3
; SVE-FIXED-NEXT: bfi x9, x8, #1, #3
; SVE-FIXED-NEXT: fmov w8, s1
; SVE-FIXED-NEXT: ldr h0, [x9]
; SVE-FIXED-NEXT: tst w8, #0x1
; SVE-FIXED-NEXT: fcsel h0, h0, h2, ne
; SVE-FIXED-NEXT: add sp, sp, #16
; SVE-FIXED-NEXT: ret
%notzero = icmp ne <8 x i16> %mask, zeroinitializer
%res = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> %data, <8 x i1> %notzero, bfloat %passthru)
ret bfloat %res
}
define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %passthru) {
; NEON-FIXED-LABEL: extract_last_float:
; NEON-FIXED: // %bb.0:
; NEON-FIXED-NEXT: sub sp, sp, #16
; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16
; NEON-FIXED-NEXT: cmtst v1.4s, v1.4s, v1.4s
; NEON-FIXED-NEXT: adrp x8, .LCPI6_0
; NEON-FIXED-NEXT: mov x9, sp
; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI6_0]
; NEON-FIXED-NEXT: str q0, [sp]
; NEON-FIXED-NEXT: xtn v3.4h, v1.4s
; NEON-FIXED-NEXT: umaxv s1, v1.4s
; NEON-FIXED-NEXT: and v3.8b, v3.8b, v4.8b
; NEON-FIXED-NEXT: umaxv h3, v3.4h
; NEON-FIXED-NEXT: fmov w8, s3
; NEON-FIXED-NEXT: bfi x9, x8, #2, #2
; NEON-FIXED-NEXT: fmov w8, s1
; NEON-FIXED-NEXT: ldr s0, [x9]
; NEON-FIXED-NEXT: tst w8, #0x1
; NEON-FIXED-NEXT: fcsel s0, s0, s2, ne
; NEON-FIXED-NEXT: add sp, sp, #16
; NEON-FIXED-NEXT: ret
;
; SVE-FIXED-LABEL: extract_last_float:
; SVE-FIXED: // %bb.0:
; SVE-FIXED-NEXT: sub sp, sp, #16
; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
; SVE-FIXED-NEXT: cmtst v1.4s, v1.4s, v1.4s
; SVE-FIXED-NEXT: index z4.h, #0, #1
; SVE-FIXED-NEXT: mov x9, sp
; SVE-FIXED-NEXT: str q0, [sp]
; SVE-FIXED-NEXT: xtn v3.4h, v1.4s
; SVE-FIXED-NEXT: umaxv s1, v1.4s
; SVE-FIXED-NEXT: and v3.8b, v3.8b, v4.8b
; SVE-FIXED-NEXT: umaxv h3, v3.4h
; SVE-FIXED-NEXT: fmov w8, s3
; SVE-FIXED-NEXT: bfi x9, x8, #2, #2
; SVE-FIXED-NEXT: fmov w8, s1
; SVE-FIXED-NEXT: ldr s0, [x9]
; SVE-FIXED-NEXT: tst w8, #0x1
; SVE-FIXED-NEXT: fcsel s0, s0, s2, ne
; SVE-FIXED-NEXT: add sp, sp, #16
; SVE-FIXED-NEXT: ret
%notzero = icmp ne <4 x i32> %mask, zeroinitializer
%res = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> %data, <4 x i1> %notzero, float %passthru)
ret float %res
}
define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double %passthru) {
; NEON-FIXED-LABEL: extract_last_double:
; NEON-FIXED: // %bb.0:
; NEON-FIXED-NEXT: sub sp, sp, #16
; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16
; NEON-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d
; NEON-FIXED-NEXT: adrp x8, .LCPI7_0
; NEON-FIXED-NEXT: mov x9, sp
; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI7_0]
; NEON-FIXED-NEXT: str q0, [sp]
; NEON-FIXED-NEXT: xtn v3.2s, v1.2d
; NEON-FIXED-NEXT: umaxv s1, v1.4s
; NEON-FIXED-NEXT: and v3.8b, v3.8b, v4.8b
; NEON-FIXED-NEXT: umaxp v3.2s, v3.2s, v3.2s
; NEON-FIXED-NEXT: fmov w8, s3
; NEON-FIXED-NEXT: bfi x9, x8, #3, #1
; NEON-FIXED-NEXT: fmov w8, s1
; NEON-FIXED-NEXT: ldr d0, [x9]
; NEON-FIXED-NEXT: tst w8, #0x1
; NEON-FIXED-NEXT: fcsel d0, d0, d2, ne
; NEON-FIXED-NEXT: add sp, sp, #16
; NEON-FIXED-NEXT: ret
;
; SVE-FIXED-LABEL: extract_last_double:
; SVE-FIXED: // %bb.0:
; SVE-FIXED-NEXT: sub sp, sp, #16
; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
; SVE-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d
; SVE-FIXED-NEXT: index z4.s, #0, #1
; SVE-FIXED-NEXT: mov x9, sp
; SVE-FIXED-NEXT: str q0, [sp]
; SVE-FIXED-NEXT: xtn v3.2s, v1.2d
; SVE-FIXED-NEXT: umaxv s1, v1.4s
; SVE-FIXED-NEXT: and v3.8b, v3.8b, v4.8b
; SVE-FIXED-NEXT: umaxp v3.2s, v3.2s, v3.2s
; SVE-FIXED-NEXT: fmov w8, s3
; SVE-FIXED-NEXT: bfi x9, x8, #3, #1
; SVE-FIXED-NEXT: fmov w8, s1
; SVE-FIXED-NEXT: ldr d0, [x9]
; SVE-FIXED-NEXT: tst w8, #0x1
; SVE-FIXED-NEXT: fcsel d0, d0, d2, ne
; SVE-FIXED-NEXT: add sp, sp, #16
; SVE-FIXED-NEXT: ret
%notzero = icmp ne <2 x i64> %mask, zeroinitializer
%res = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> %data, <2 x i1> %notzero, double %passthru)
ret double %res
}
define i8 @extract_last_i8_scalable(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 %passthru) #0 {
; CHECK-LABEL: extract_last_i8_scalable:
; CHECK: // %bb.0:
; CHECK-NEXT: clastb w0, p0, w0, z0.b
; CHECK-NEXT: ret
%res = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 %passthru)
ret i8 %res
}
define i16 @extract_last_i16_scalable(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru) #0 {
; CHECK-LABEL: extract_last_i16_scalable:
; CHECK: // %bb.0:
; CHECK-NEXT: clastb w0, p0, w0, z0.h
; CHECK-NEXT: ret
%res = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru)
ret i16 %res
}
define i32 @extract_last_i32_scalable(<vscale x 4 x i32> %data, <vscale x 4 x i1> %mask, i32 %passthru) #0 {
; CHECK-LABEL: extract_last_i32_scalable:
; CHECK: // %bb.0:
; CHECK-NEXT: clastb w0, p0, w0, z0.s
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %mask, i32 %passthru)
ret i32 %res
}
define i64 @extract_last_i64_scalable(<vscale x 2 x i64> %data, <vscale x 2 x i1> %mask, i64 %passthru) #0 {
; CHECK-LABEL: extract_last_i64_scalable:
; CHECK: // %bb.0:
; CHECK-NEXT: clastb x0, p0, x0, z0.d
; CHECK-NEXT: ret
%res = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %mask, i64 %passthru)
ret i64 %res
}
define half @extract_last_half_scalable(<vscale x 8 x half> %data, <vscale x 8 x i1> %mask, half %passthru) #0 {
; CHECK-LABEL: extract_last_half_scalable:
; CHECK: // %bb.0:
; CHECK-NEXT: clastb h1, p0, h1, z0.h
; CHECK-NEXT: fmov s0, s1
; CHECK-NEXT: ret
%res = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %mask, half %passthru)
ret half %res
}
define bfloat @extract_last_bfloat_scalable(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %mask, bfloat %passthru) #0 {
; CHECK-LABEL: extract_last_bfloat_scalable:
; CHECK: // %bb.0:
; CHECK-NEXT: clastb h1, p0, h1, z0.h
; CHECK-NEXT: fmov s0, s1
; CHECK-NEXT: ret
%res = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %mask, bfloat %passthru)
ret bfloat %res
}
define float @extract_last_float_scalable(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru) #0 {
; CHECK-LABEL: extract_last_float_scalable:
; CHECK: // %bb.0:
; CHECK-NEXT: clastb s1, p0, s1, z0.s
; CHECK-NEXT: fmov s0, s1
; CHECK-NEXT: ret
%res = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru)
ret float %res
}
define double @extract_last_double_scalable(<vscale x 2 x double> %data, <vscale x 2 x i1> %mask, double %passthru) #0 {
; CHECK-LABEL: extract_last_double_scalable:
; CHECK: // %bb.0:
; CHECK-NEXT: clastb d1, p0, d1, z0.d
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%res = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %mask, double %passthru)
ret double %res
}
;; If the passthru parameter is poison, we shouldn't see a select at the end.
define i8 @extract_last_i8_scalable_poison_passthru(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask) #0 {
; CHECK-LABEL: extract_last_i8_scalable_poison_passthru:
; CHECK: // %bb.0:
; CHECK-NEXT: lastb w0, p0, z0.b
; CHECK-NEXT: ret
%res = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 poison)
ret i8 %res
}
;; (c)lastb doesn't exist for predicate types; check we get functional codegen
define i1 @extract_last_i1_scalable(<vscale x 16 x i1> %data, <vscale x 16 x i1> %mask) #0 {
; CHECK-LABEL: extract_last_i1_scalable:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
; CHECK-NEXT: ptest p1, p1.b
; CHECK-NEXT: cset w9, ne
; CHECK-NEXT: lastb w8, p1, z0.b
; CHECK-NEXT: and w0, w9, w8
; CHECK-NEXT: ret
%res = call i1 @llvm.experimental.vector.extract.last.active.nxv16i1(<vscale x 16 x i1> %data, <vscale x 16 x i1> %mask, i1 false)
ret i1 %res
}
; Test v3i32 - non-power-of-2 element count that requires mask widening
; (v3i1 -> v4i1) via WidenVecOp_VECTOR_FIND_LAST_ACTIVE.
define i32 @extract_last_active_v3i32(<3 x i32> %a, <3 x i1> %c) {
; NEON-FIXED-LABEL: extract_last_active_v3i32:
; NEON-FIXED: // %bb.0:
; NEON-FIXED-NEXT: sub sp, sp, #16
; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16
; NEON-FIXED-NEXT: movi v1.2d, #0000000000000000
; NEON-FIXED-NEXT: adrp x8, .LCPI18_0
; NEON-FIXED-NEXT: mov x11, sp
; NEON-FIXED-NEXT: ldr d2, [x8, :lo12:.LCPI18_0]
; NEON-FIXED-NEXT: str q0, [sp]
; NEON-FIXED-NEXT: mov v1.h[0], w0
; NEON-FIXED-NEXT: mov v1.h[1], w1
; NEON-FIXED-NEXT: mov v1.h[2], w2
; NEON-FIXED-NEXT: and v2.8b, v1.8b, v2.8b
; NEON-FIXED-NEXT: fmov x8, d1
; NEON-FIXED-NEXT: umaxv h2, v2.4h
; NEON-FIXED-NEXT: lsr x9, x8, #32
; NEON-FIXED-NEXT: orr w9, w8, w9
; NEON-FIXED-NEXT: orr w8, w9, w8, lsr #16
; NEON-FIXED-NEXT: fmov w10, s2
; NEON-FIXED-NEXT: tst w8, #0x1
; NEON-FIXED-NEXT: bfi x11, x10, #2, #2
; NEON-FIXED-NEXT: ldr w9, [x11]
; NEON-FIXED-NEXT: csinv w0, w9, wzr, ne
; NEON-FIXED-NEXT: add sp, sp, #16
; NEON-FIXED-NEXT: ret
;
; SVE-FIXED-LABEL: extract_last_active_v3i32:
; SVE-FIXED: // %bb.0:
; SVE-FIXED-NEXT: sub sp, sp, #16
; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
; SVE-FIXED-NEXT: movi v1.2d, #0000000000000000
; SVE-FIXED-NEXT: index z2.h, #0, #1
; SVE-FIXED-NEXT: mov x11, sp
; SVE-FIXED-NEXT: str q0, [sp]
; SVE-FIXED-NEXT: mov v1.h[0], w0
; SVE-FIXED-NEXT: mov v1.h[1], w1
; SVE-FIXED-NEXT: mov v1.h[2], w2
; SVE-FIXED-NEXT: and v2.8b, v1.8b, v2.8b
; SVE-FIXED-NEXT: fmov x8, d1
; SVE-FIXED-NEXT: umaxv h2, v2.4h
; SVE-FIXED-NEXT: lsr x9, x8, #32
; SVE-FIXED-NEXT: orr w9, w8, w9
; SVE-FIXED-NEXT: orr w8, w9, w8, lsr #16
; SVE-FIXED-NEXT: fmov w10, s2
; SVE-FIXED-NEXT: tst w8, #0x1
; SVE-FIXED-NEXT: bfi x11, x10, #2, #2
; SVE-FIXED-NEXT: ldr w9, [x11]
; SVE-FIXED-NEXT: csinv w0, w9, wzr, ne
; SVE-FIXED-NEXT: add sp, sp, #16
; SVE-FIXED-NEXT: ret
%res = call i32 @llvm.experimental.vector.extract.last.active.v3i32(<3 x i32> %a, <3 x i1> %c, i32 -1)
ret i32 %res
}
define i8 @extract_last_active_split(<vscale x 32 x i8> %data, <vscale x 32 x i1> %mask, i8 %passthru) #0 {
; CHECK-LABEL: extract_last_active_split:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: ptest p1, p1.b
; CHECK-NEXT: rdvl x10, #1
; CHECK-NEXT: str z1, [sp, #1, mul vl]
; CHECK-NEXT: str z0, [sp]
; CHECK-NEXT: lastb w8, p1, z2.b
; CHECK-NEXT: lastb w9, p0, z2.b
; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
; CHECK-NEXT: add x8, x8, x10
; CHECK-NEXT: rdvl x10, #2
; CHECK-NEXT: csel x8, x8, x9, ne
; CHECK-NEXT: sub x9, x10, #1
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: ldrb w8, [x9, x8]
; CHECK-NEXT: csel w0, w8, w0, ne
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> %data, <vscale x 32 x i1> %mask, i8 %passthru)
ret i8 %res
}
attributes #0 = { nounwind "target-features"="+sve" vscale_range(1, 16) }