llvm-project/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll
Arseny Kapoulkine 5b98992fb9
[WebAssembly] Optimize convert_iKxN_u into convert_iKxN_s (#149609)
convert_iKxN_s is canonicalized into convert_iKxN_u when the argument is
known to have sign bit 0. This results in emitting Wasm opcodes that, on
some targets (like x86_64), are dramatically slower than signed versions
on major engines.

Similarly to X86, we now fix this up in isel when the instruction has
nonneg flag from canonicalization or if we know the source has zero sign
bit.

Fixes #149457.
2025-07-21 09:17:29 -07:00

145 lines
5.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mattr=+simd128 | FileCheck %s
; TODO: These tests should check that floating point conversions select
; extending instructions where possible
target triple = "wasm32-unknown-unknown"
define <4 x float> @extend_to_float_low_i16x8_u(<8 x i16> %x) {
; CHECK-LABEL: extend_to_float_low_i16x8_u:
; CHECK: .functype extend_to_float_low_i16x8_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.extend_low_i16x8_u
; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%extended = uitofp <4 x i16> %low to <4 x float>
ret <4 x float> %extended
}
define <4 x float> @extend_to_float_high_i16x8_u(<8 x i16> %x) {
; CHECK-LABEL: extend_to_float_high_i16x8_u:
; CHECK: .functype extend_to_float_high_i16x8_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.extend_high_i16x8_u
; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%high = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%extended = uitofp <4 x i16> %high to <4 x float>
ret <4 x float> %extended
}
define <4 x float> @extend_to_float_low_i8x16_u(<8 x i8> %x) {
; CHECK-LABEL: extend_to_float_low_i8x16_u:
; CHECK: .functype extend_to_float_low_i8x16_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extend_low_i8x16_u
; CHECK-NEXT: i32x4.extend_low_i16x8_u
; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%extended = uitofp <4 x i8> %low to <4 x float>
ret <4 x float> %extended
}
define <4 x float> @extend_to_float_high_i8x16_u(<8 x i8> %x) {
; CHECK-LABEL: extend_to_float_high_i8x16_u:
; CHECK: .functype extend_to_float_high_i8x16_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.shuffle 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: i16x8.extend_low_i8x16_u
; CHECK-NEXT: i32x4.extend_low_i16x8_u
; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%high = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%extended = uitofp <4 x i8> %high to <4 x float>
ret <4 x float> %extended
}
define <4 x float> @extend_to_float_low_i16x8_s(<8 x i16> %x) {
; CHECK-LABEL: extend_to_float_low_i16x8_s:
; CHECK: .functype extend_to_float_low_i16x8_s (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.extend_low_i16x8_s
; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%extended = sitofp <4 x i16> %low to <4 x float>
ret <4 x float> %extended
}
define <4 x float> @extend_to_float_high_i16x8_s(<8 x i16> %x) {
; CHECK-LABEL: extend_to_float_high_i16x8_s:
; CHECK: .functype extend_to_float_high_i16x8_s (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.extend_high_i16x8_s
; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%high = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%extended = sitofp <4 x i16> %high to <4 x float>
ret <4 x float> %extended
}
define <4 x float> @extend_to_float_low_i8x16_s(<8 x i8> %x) {
; CHECK-LABEL: extend_to_float_low_i8x16_s:
; CHECK: .functype extend_to_float_low_i8x16_s (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extend_low_i8x16_s
; CHECK-NEXT: i32x4.extend_low_i16x8_s
; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%extended = sitofp <4 x i8> %low to <4 x float>
ret <4 x float> %extended
}
define <4 x float> @extend_to_float_high_i8x16_s(<8 x i8> %x) {
; CHECK-LABEL: extend_to_float_high_i8x16_s:
; CHECK: .functype extend_to_float_high_i8x16_s (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.shuffle 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: i16x8.extend_low_i8x16_s
; CHECK-NEXT: i32x4.extend_low_i16x8_s
; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%high = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%extended = sitofp <4 x i8> %high to <4 x float>
ret <4 x float> %extended
}
define <2 x double> @extend_to_double_low_i32x4_u(<4 x i32> %x) {
; CHECK-LABEL: extend_to_double_low_i32x4_u:
; CHECK: .functype extend_to_double_low_i32x4_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: f64x2.convert_low_i32x4_u
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
%extended = uitofp <2 x i32> %low to <2 x double>
ret <2 x double> %extended
}
define <2 x double> @extend_to_double_low_i16x4_u(<4 x i16> %x) {
; CHECK-LABEL: extend_to_double_low_i16x4_u:
; CHECK: .functype extend_to_double_low_i16x4_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.extend_low_i16x8_u
; CHECK-NEXT: f64x2.convert_low_i32x4_s
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <4 x i16> %x, <4 x i16> undef, <2 x i32> <i32 0, i32 1>
%extended = uitofp <2 x i16> %low to <2 x double>
ret <2 x double> %extended
}