This lowers the splitting as: ``` any_active(hi_mask) ? (find_last_active(hi_mask) + lo_mask.getVectorElementCount()) : find_last_active(lo_mask) ``` And trivially lowers `<1 x i1>` scalarization to returning zero. Which is a natural result of the splitting (and the lack of a sentinel "none-active" result value). The lowerings likely can be improved. This patch is for completeness. Should fix: https://github.com/llvm/llvm-project/pull/178862#issuecomment-3862310334 Fixes #180212
150 lines
4.6 KiB
LLVM
150 lines
4.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
|
; RUN: llc < %s | FileCheck %s
|
|
|
|
target triple = "wasm32-unknown-unknown"
|
|
|
|
define i32 @extract_last_active_v4i32(<4 x i32> %a, <4 x i1> %c) {
|
|
; CHECK-LABEL: extract_last_active_v4i32:
|
|
; CHECK: .functype extract_last_active_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32) -> (i32)
|
|
; CHECK-NEXT: .local i32
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: global.get __stack_pointer
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.sub
|
|
; CHECK-NEXT: local.tee 8
|
|
; CHECK-NEXT: local.get 3
|
|
; CHECK-NEXT: i32.store 12
|
|
; CHECK-NEXT: local.get 8
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: i32.store 8
|
|
; CHECK-NEXT: local.get 8
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.store 4
|
|
; CHECK-NEXT: local.get 8
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.store 0
|
|
; CHECK-NEXT: local.get 8
|
|
; CHECK-NEXT: i32.const 3
|
|
; CHECK-NEXT: i32.const 2
|
|
; CHECK-NEXT: local.get 7
|
|
; CHECK-NEXT: i32.const 1
|
|
; CHECK-NEXT: i32.and
|
|
; CHECK-NEXT: i32.select
|
|
; CHECK-NEXT: i32.const 1
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 5
|
|
; CHECK-NEXT: i32.const 1
|
|
; CHECK-NEXT: i32.and
|
|
; CHECK-NEXT: i32.select
|
|
; CHECK-NEXT: local.get 6
|
|
; CHECK-NEXT: local.get 7
|
|
; CHECK-NEXT: i32.or
|
|
; CHECK-NEXT: i32.const 1
|
|
; CHECK-NEXT: i32.and
|
|
; CHECK-NEXT: i32.select
|
|
; CHECK-NEXT: i32.const 2
|
|
; CHECK-NEXT: i32.shl
|
|
; CHECK-NEXT: i32.or
|
|
; CHECK-NEXT: i32.load 0
|
|
; CHECK-NEXT: i32.const -1
|
|
; CHECK-NEXT: local.get 4
|
|
; CHECK-NEXT: local.get 6
|
|
; CHECK-NEXT: i32.or
|
|
; CHECK-NEXT: local.get 5
|
|
; CHECK-NEXT: local.get 7
|
|
; CHECK-NEXT: i32.or
|
|
; CHECK-NEXT: i32.or
|
|
; CHECK-NEXT: i32.const 1
|
|
; CHECK-NEXT: i32.and
|
|
; CHECK-NEXT: i32.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> %a, <4 x i1> %c, i32 -1)
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @extract_last_active_v4i32_no_default(<4 x i32> %a, <4 x i1> %c) {
|
|
; CHECK-LABEL: extract_last_active_v4i32_no_default:
|
|
; CHECK: .functype extract_last_active_v4i32_no_default (i32, i32, i32, i32, i32, i32, i32, i32) -> (i32)
|
|
; CHECK-NEXT: .local i32
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: global.get __stack_pointer
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.sub
|
|
; CHECK-NEXT: local.tee 8
|
|
; CHECK-NEXT: local.get 3
|
|
; CHECK-NEXT: i32.store 12
|
|
; CHECK-NEXT: local.get 8
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: i32.store 8
|
|
; CHECK-NEXT: local.get 8
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.store 4
|
|
; CHECK-NEXT: local.get 8
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.store 0
|
|
; CHECK-NEXT: local.get 8
|
|
; CHECK-NEXT: i32.const 3
|
|
; CHECK-NEXT: i32.const 2
|
|
; CHECK-NEXT: local.get 7
|
|
; CHECK-NEXT: i32.const 1
|
|
; CHECK-NEXT: i32.and
|
|
; CHECK-NEXT: i32.select
|
|
; CHECK-NEXT: i32.const 1
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 5
|
|
; CHECK-NEXT: i32.const 1
|
|
; CHECK-NEXT: i32.and
|
|
; CHECK-NEXT: i32.select
|
|
; CHECK-NEXT: local.get 6
|
|
; CHECK-NEXT: local.get 7
|
|
; CHECK-NEXT: i32.or
|
|
; CHECK-NEXT: i32.const 1
|
|
; CHECK-NEXT: i32.and
|
|
; CHECK-NEXT: i32.select
|
|
; CHECK-NEXT: i32.const 2
|
|
; CHECK-NEXT: i32.shl
|
|
; CHECK-NEXT: i32.or
|
|
; CHECK-NEXT: i32.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> %a, <4 x i1> %c, i32 poison)
|
|
ret i32 %res
|
|
}
|
|
|
|
; Test v2i32 - smaller vector.
|
|
define i32 @extract_last_active_v2i32(<2 x i32> %a, <2 x i1> %c) {
|
|
; CHECK-LABEL: extract_last_active_v2i32:
|
|
; CHECK: .functype extract_last_active_v2i32 (i32, i32, i32, i32) -> (i32)
|
|
; CHECK-NEXT: .local i32
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: global.get __stack_pointer
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.sub
|
|
; CHECK-NEXT: local.tee 4
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.store 12
|
|
; CHECK-NEXT: local.get 4
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.store 8
|
|
; CHECK-NEXT: local.get 4
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i32.const 4
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 3
|
|
; CHECK-NEXT: i32.const 1
|
|
; CHECK-NEXT: i32.and
|
|
; CHECK-NEXT: i32.select
|
|
; CHECK-NEXT: i32.or
|
|
; CHECK-NEXT: i32.load 0
|
|
; CHECK-NEXT: i32.const -1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 3
|
|
; CHECK-NEXT: i32.or
|
|
; CHECK-NEXT: i32.const 1
|
|
; CHECK-NEXT: i32.and
|
|
; CHECK-NEXT: i32.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> %a, <2 x i1> %c, i32 -1)
|
|
ret i32 %res
|
|
}
|