
Constants in BUILD_VECTOR may be down cast into a smaller value that fits LaneBits, i.e., the bit width of elements in the vector. This cast didn't consider 2^N where it would be cast into -2^N, which still doesn't fit into LaneBits after casting. This will cause an assertion in later legalization. 2^N should be cast into 0, and this patch reflects such behavior. This patch also includes a test to reflect the fix. This patch fixes [issue 61780](https://github.com/llvm/llvm-project/issues/61780) Related patch: https://reviews.llvm.org/D108669 Reviewed By: tlively Differential Revision: https://reviews.llvm.org/D147208
47 lines
1.4 KiB
LLVM
47 lines
1.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=wasm32 -mattr=+simd128 -opaque-pointers | FileCheck %s
|
|
|
|
define void @f(ptr %0, ptr %pr) {
|
|
; CHECK-LABEL: f:
|
|
; CHECK: .functype f (i32, i32) -> ()
|
|
; CHECK-NEXT: .local v128
|
|
; CHECK-NEXT: # %bb.0: # %BB
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load64_zero 0
|
|
; CHECK-NEXT: v128.const 0, 1, 0, 0
|
|
; CHECK-NEXT: i32x4.gt_u
|
|
; CHECK-NEXT: local.tee 2
|
|
; CHECK-NEXT: i32x4.extract_lane 0
|
|
; CHECK-NEXT: i32.const 1
|
|
; CHECK-NEXT: i32.and
|
|
; CHECK-NEXT: i32.shr_u
|
|
; CHECK-NEXT: local.tee 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.mul
|
|
; CHECK-NEXT: i8x16.replace_lane 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: i32x4.extract_lane 1
|
|
; CHECK-NEXT: i32.const 1
|
|
; CHECK-NEXT: i32.and
|
|
; CHECK-NEXT: i32.shr_u
|
|
; CHECK-NEXT: local.tee 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.mul
|
|
; CHECK-NEXT: i8x16.replace_lane 1
|
|
; CHECK-NEXT: v128.store16_lane 0, 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
BB:
|
|
%v0 = load <2 x i32>, ptr %0
|
|
%v1 = icmp ugt <2 x i32> %v0, <i32 0, i32 1>
|
|
%v2 = zext <2 x i1> %v1 to <2 x i8>
|
|
%v3 = ashr <2 x i8> <i8 16, i8 16>, %v2
|
|
%v4 = mul <2 x i8> %v3, %v3
|
|
store <2 x i8> %v4, ptr %pr
|
|
ret void
|
|
}
|
|
|