llvm-project/llvm/test/CodeGen/WebAssembly/vector-extract-last-active.ll
Benjamin Maxwell b91eb9b4e5
[SDAG] Implement missing legalization for ISD::VECTOR_FIND_LAST_ACTIVE (#180290)
This lowers the splitting as:
```
any_active(hi_mask)
  ? (find_last_active(hi_mask) + lo_mask.getVectorElementCount())
  : find_last_active(lo_mask)
```

And trivially lowers `<1 x i1>` scalarization to returning zero. Which
is a natural result of the splitting (and the lack of a sentinel
"none-active" result value).

The lowerings likely can be improved. This patch is for completeness.

Should fix:
https://github.com/llvm/llvm-project/pull/178862#issuecomment-3862310334
Fixes #180212
2026-02-10 09:01:13 +00:00

150 lines
4.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s | FileCheck %s
target triple = "wasm32-unknown-unknown"
define i32 @extract_last_active_v4i32(<4 x i32> %a, <4 x i1> %c) {
; CHECK-LABEL: extract_last_active_v4i32:
; CHECK: .functype extract_last_active_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32) -> (i32)
; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: global.get __stack_pointer
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.sub
; CHECK-NEXT: local.tee 8
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i32.store 12
; CHECK-NEXT: local.get 8
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i32.store 8
; CHECK-NEXT: local.get 8
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.store 4
; CHECK-NEXT: local.get 8
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.store 0
; CHECK-NEXT: local.get 8
; CHECK-NEXT: i32.const 3
; CHECK-NEXT: i32.const 2
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.select
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 5
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.select
; CHECK-NEXT: local.get 6
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.select
; CHECK-NEXT: i32.const 2
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.load 0
; CHECK-NEXT: i32.const -1
; CHECK-NEXT: local.get 4
; CHECK-NEXT: local.get 6
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 5
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.select
; CHECK-NEXT: # fallthrough-return
%res = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> %a, <4 x i1> %c, i32 -1)
ret i32 %res
}
define i32 @extract_last_active_v4i32_no_default(<4 x i32> %a, <4 x i1> %c) {
; CHECK-LABEL: extract_last_active_v4i32_no_default:
; CHECK: .functype extract_last_active_v4i32_no_default (i32, i32, i32, i32, i32, i32, i32, i32) -> (i32)
; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: global.get __stack_pointer
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.sub
; CHECK-NEXT: local.tee 8
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i32.store 12
; CHECK-NEXT: local.get 8
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i32.store 8
; CHECK-NEXT: local.get 8
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.store 4
; CHECK-NEXT: local.get 8
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.store 0
; CHECK-NEXT: local.get 8
; CHECK-NEXT: i32.const 3
; CHECK-NEXT: i32.const 2
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.select
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 5
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.select
; CHECK-NEXT: local.get 6
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.select
; CHECK-NEXT: i32.const 2
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.load 0
; CHECK-NEXT: # fallthrough-return
%res = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> %a, <4 x i1> %c, i32 poison)
ret i32 %res
}
; Test v2i32 - smaller vector.
define i32 @extract_last_active_v2i32(<2 x i32> %a, <2 x i1> %c) {
; CHECK-LABEL: extract_last_active_v2i32:
; CHECK: .functype extract_last_active_v2i32 (i32, i32, i32, i32) -> (i32)
; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: global.get __stack_pointer
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.sub
; CHECK-NEXT: local.tee 4
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.store 12
; CHECK-NEXT: local.get 4
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.store 8
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32.const 4
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.select
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.load 0
; CHECK-NEXT: i32.const -1
; CHECK-NEXT: local.get 2
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.select
; CHECK-NEXT: # fallthrough-return
%res = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> %a, <2 x i1> %c, i32 -1)
ret i32 %res
}