llvm-project/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll
Yolanda Chen 291101aa8e [WebAssembly] Optimize vector shift using a splat value from outside block
The vector shift operation in WebAssembly uses an i32 shift amount type, while
the LLVM IR requires binary operator uses the same type of operands. When the
shift amount operand is splated from a different block, the splat source will
not be exported and the vector shift will be unrolled to scalar shifts. This
patch enables the vector shift to identify the splat source value from the other
block, and generate expected WebAssembly bytecode when lowering.

Reviewed By: tlively

Differential Revision: https://reviews.llvm.org/D158399
2023-08-25 08:13:27 -07:00

105 lines
3.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
; Test that SIMD shifts can be lowered correctly even when shift
; values are exported from outside blocks.
target triple = "wasm32-unknown-unknown"
define void @shl_loop(ptr %a, i8 %shift, i32 %count) {
; CHECK-LABEL: shl_loop:
; CHECK: .functype shl_loop (i32, i32, i32) -> ()
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: .LBB0_1: # %body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: loop # label0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.shl
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.set 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i32.const -1
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.tee 2
; CHECK-NEXT: i32.eqz
; CHECK-NEXT: br_if 0 # 0: up to label0
; CHECK-NEXT: # %bb.2: # %exit
; CHECK-NEXT: end_loop
; CHECK-NEXT: # fallthrough-return
entry:
%t1 = insertelement <16 x i8> undef, i8 %shift, i32 0
%vshift = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer
br label %body
body:
%out = phi ptr [%a, %entry], [%b, %body]
%i = phi i32 [0, %entry], [%next, %body]
%v = load <16 x i8>, ptr %out, align 1
%r = shl <16 x i8> %v, %vshift
%b = getelementptr inbounds i8, ptr %out, i32 16
store <16 x i8> %r, ptr %b
%next = add i32 %i, 1
%i.cmp = icmp eq i32 %next, %count
br i1 %i.cmp, label %body, label %exit
exit:
ret void
}
; Test that SIMD shifts can be lowered correctly when shift value
; is a phi inside loop body.
define void @shl_phi_loop(ptr %a, i8 %shift, i32 %count) {
; CHECK-LABEL: shl_phi_loop:
; CHECK: .functype shl_phi_loop (i32, i32, i32) -> ()
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: .LBB1_1: # %body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: loop # label1:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.shl
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: local.set 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.set 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i32.const -1
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.tee 2
; CHECK-NEXT: i32.eqz
; CHECK-NEXT: br_if 0 # 0: up to label1
; CHECK-NEXT: # %bb.2: # %exit
; CHECK-NEXT: end_loop
; CHECK-NEXT: # fallthrough-return
entry:
br label %body
body:
%out = phi ptr [%a, %entry], [%b, %body]
%i = phi i32 [0, %entry], [%next, %body]
%t1 = phi i8 [%shift, %entry], [%sand, %body]
%t2 = insertelement <16 x i8> undef, i8 %t1, i32 0
%vshift = shufflevector <16 x i8> %t2, <16 x i8> undef, <16 x i32> zeroinitializer
%v = load <16 x i8>, ptr %out, align 1
%r = shl <16 x i8> %v, %vshift
%b = getelementptr inbounds i8, ptr %out, i32 16
store <16 x i8> %r, ptr %b
%sand = and i8 %t1, 1
%next = add i32 %i, 1
%i.cmp = icmp eq i32 %next, %count
br i1 %i.cmp, label %body, label %exit
exit:
ret void
}