
The vector shift operation in WebAssembly uses an i32 shift amount type, while the LLVM IR requires binary operator uses the same type of operands. When the shift amount operand is splated from a different block, the splat source will not be exported and the vector shift will be unrolled to scalar shifts. This patch enables the vector shift to identify the splat source value from the other block, and generate expected WebAssembly bytecode when lowering. Reviewed By: tlively Differential Revision: https://reviews.llvm.org/D158399
105 lines
3.3 KiB
LLVM
105 lines
3.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
|
|
|
|
; Test that SIMD shifts can be lowered correctly even when shift
|
|
; values are exported from outside blocks.
|
|
|
|
target triple = "wasm32-unknown-unknown"
|
|
|
|
define void @shl_loop(ptr %a, i8 %shift, i32 %count) {
|
|
; CHECK-LABEL: shl_loop:
|
|
; CHECK: .functype shl_loop (i32, i32, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0: # %entry
|
|
; CHECK-NEXT: .LBB0_1: # %body
|
|
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: loop # label0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 0:p2align=0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i8x16.shl
|
|
; CHECK-NEXT: v128.store 16
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.set 0
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: i32.const -1
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.tee 2
|
|
; CHECK-NEXT: i32.eqz
|
|
; CHECK-NEXT: br_if 0 # 0: up to label0
|
|
; CHECK-NEXT: # %bb.2: # %exit
|
|
; CHECK-NEXT: end_loop
|
|
; CHECK-NEXT: # fallthrough-return
|
|
entry:
|
|
%t1 = insertelement <16 x i8> undef, i8 %shift, i32 0
|
|
%vshift = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer
|
|
br label %body
|
|
body:
|
|
%out = phi ptr [%a, %entry], [%b, %body]
|
|
%i = phi i32 [0, %entry], [%next, %body]
|
|
%v = load <16 x i8>, ptr %out, align 1
|
|
%r = shl <16 x i8> %v, %vshift
|
|
%b = getelementptr inbounds i8, ptr %out, i32 16
|
|
store <16 x i8> %r, ptr %b
|
|
%next = add i32 %i, 1
|
|
%i.cmp = icmp eq i32 %next, %count
|
|
br i1 %i.cmp, label %body, label %exit
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Test that SIMD shifts can be lowered correctly when shift value
|
|
; is a phi inside loop body.
|
|
|
|
define void @shl_phi_loop(ptr %a, i8 %shift, i32 %count) {
|
|
; CHECK-LABEL: shl_phi_loop:
|
|
; CHECK: .functype shl_phi_loop (i32, i32, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0: # %entry
|
|
; CHECK-NEXT: .LBB1_1: # %body
|
|
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: loop # label1:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 0:p2align=0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i8x16.shl
|
|
; CHECK-NEXT: v128.store 16
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 1
|
|
; CHECK-NEXT: i32.and
|
|
; CHECK-NEXT: local.set 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.set 0
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: i32.const -1
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.tee 2
|
|
; CHECK-NEXT: i32.eqz
|
|
; CHECK-NEXT: br_if 0 # 0: up to label1
|
|
; CHECK-NEXT: # %bb.2: # %exit
|
|
; CHECK-NEXT: end_loop
|
|
; CHECK-NEXT: # fallthrough-return
|
|
entry:
|
|
br label %body
|
|
body:
|
|
%out = phi ptr [%a, %entry], [%b, %body]
|
|
%i = phi i32 [0, %entry], [%next, %body]
|
|
%t1 = phi i8 [%shift, %entry], [%sand, %body]
|
|
%t2 = insertelement <16 x i8> undef, i8 %t1, i32 0
|
|
%vshift = shufflevector <16 x i8> %t2, <16 x i8> undef, <16 x i32> zeroinitializer
|
|
%v = load <16 x i8>, ptr %out, align 1
|
|
%r = shl <16 x i8> %v, %vshift
|
|
%b = getelementptr inbounds i8, ptr %out, i32 16
|
|
store <16 x i8> %r, ptr %b
|
|
%sand = and i8 %t1, 1
|
|
%next = add i32 %i, 1
|
|
%i.cmp = icmp eq i32 %next, %count
|
|
br i1 %i.cmp, label %body, label %exit
|
|
exit:
|
|
ret void
|
|
}
|