
convert_iKxN_s is canonicalized into convert_iKxN_u when the argument is known to have sign bit 0. This results in emitting Wasm opcodes that, on some targets (like x86_64), are dramatically slower than signed versions on major engines. Similarly to X86, we now fix this up in isel when the instruction has nonneg flag from canonicalization or if we know the source has zero sign bit. Fixes #149457.
145 lines
5.8 KiB
LLVM
145 lines
5.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mattr=+simd128 | FileCheck %s
|
|
|
|
; TODO: These tests should check that floating point conversions select
|
|
; extending instructions where possible
|
|
|
|
target triple = "wasm32-unknown-unknown"
|
|
|
|
define <4 x float> @extend_to_float_low_i16x8_u(<8 x i16> %x) {
|
|
; CHECK-LABEL: extend_to_float_low_i16x8_u:
|
|
; CHECK: .functype extend_to_float_low_i16x8_u (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.extend_low_i16x8_u
|
|
; CHECK-NEXT: f32x4.convert_i32x4_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%extended = uitofp <4 x i16> %low to <4 x float>
|
|
ret <4 x float> %extended
|
|
}
|
|
|
|
define <4 x float> @extend_to_float_high_i16x8_u(<8 x i16> %x) {
|
|
; CHECK-LABEL: extend_to_float_high_i16x8_u:
|
|
; CHECK: .functype extend_to_float_high_i16x8_u (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.extend_high_i16x8_u
|
|
; CHECK-NEXT: f32x4.convert_i32x4_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%high = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
%extended = uitofp <4 x i16> %high to <4 x float>
|
|
ret <4 x float> %extended
|
|
}
|
|
|
|
define <4 x float> @extend_to_float_low_i8x16_u(<8 x i8> %x) {
|
|
; CHECK-LABEL: extend_to_float_low_i8x16_u:
|
|
; CHECK: .functype extend_to_float_low_i8x16_u (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.extend_low_i8x16_u
|
|
; CHECK-NEXT: i32x4.extend_low_i16x8_u
|
|
; CHECK-NEXT: f32x4.convert_i32x4_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%extended = uitofp <4 x i8> %low to <4 x float>
|
|
ret <4 x float> %extended
|
|
}
|
|
|
|
define <4 x float> @extend_to_float_high_i8x16_u(<8 x i8> %x) {
|
|
; CHECK-LABEL: extend_to_float_high_i8x16_u:
|
|
; CHECK: .functype extend_to_float_high_i8x16_u (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i8x16.shuffle 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
; CHECK-NEXT: i16x8.extend_low_i8x16_u
|
|
; CHECK-NEXT: i32x4.extend_low_i16x8_u
|
|
; CHECK-NEXT: f32x4.convert_i32x4_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%high = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
%extended = uitofp <4 x i8> %high to <4 x float>
|
|
ret <4 x float> %extended
|
|
}
|
|
|
|
define <4 x float> @extend_to_float_low_i16x8_s(<8 x i16> %x) {
|
|
; CHECK-LABEL: extend_to_float_low_i16x8_s:
|
|
; CHECK: .functype extend_to_float_low_i16x8_s (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.extend_low_i16x8_s
|
|
; CHECK-NEXT: f32x4.convert_i32x4_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%extended = sitofp <4 x i16> %low to <4 x float>
|
|
ret <4 x float> %extended
|
|
}
|
|
|
|
define <4 x float> @extend_to_float_high_i16x8_s(<8 x i16> %x) {
|
|
; CHECK-LABEL: extend_to_float_high_i16x8_s:
|
|
; CHECK: .functype extend_to_float_high_i16x8_s (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.extend_high_i16x8_s
|
|
; CHECK-NEXT: f32x4.convert_i32x4_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%high = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
%extended = sitofp <4 x i16> %high to <4 x float>
|
|
ret <4 x float> %extended
|
|
}
|
|
|
|
define <4 x float> @extend_to_float_low_i8x16_s(<8 x i8> %x) {
|
|
; CHECK-LABEL: extend_to_float_low_i8x16_s:
|
|
; CHECK: .functype extend_to_float_low_i8x16_s (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.extend_low_i8x16_s
|
|
; CHECK-NEXT: i32x4.extend_low_i16x8_s
|
|
; CHECK-NEXT: f32x4.convert_i32x4_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%extended = sitofp <4 x i8> %low to <4 x float>
|
|
ret <4 x float> %extended
|
|
}
|
|
|
|
define <4 x float> @extend_to_float_high_i8x16_s(<8 x i8> %x) {
|
|
; CHECK-LABEL: extend_to_float_high_i8x16_s:
|
|
; CHECK: .functype extend_to_float_high_i8x16_s (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i8x16.shuffle 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
; CHECK-NEXT: i16x8.extend_low_i8x16_s
|
|
; CHECK-NEXT: i32x4.extend_low_i16x8_s
|
|
; CHECK-NEXT: f32x4.convert_i32x4_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%high = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
%extended = sitofp <4 x i8> %high to <4 x float>
|
|
ret <4 x float> %extended
|
|
}
|
|
|
|
define <2 x double> @extend_to_double_low_i32x4_u(<4 x i32> %x) {
|
|
; CHECK-LABEL: extend_to_double_low_i32x4_u:
|
|
; CHECK: .functype extend_to_double_low_i32x4_u (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: f64x2.convert_low_i32x4_u
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
|
%extended = uitofp <2 x i32> %low to <2 x double>
|
|
ret <2 x double> %extended
|
|
}
|
|
|
|
define <2 x double> @extend_to_double_low_i16x4_u(<4 x i16> %x) {
|
|
; CHECK-LABEL: extend_to_double_low_i16x4_u:
|
|
; CHECK: .functype extend_to_double_low_i16x4_u (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.extend_low_i16x8_u
|
|
; CHECK-NEXT: f64x2.convert_low_i32x4_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <4 x i16> %x, <4 x i16> undef, <2 x i32> <i32 0, i32 1>
|
|
%extended = uitofp <2 x i16> %low to <2 x double>
|
|
ret <2 x double> %extended
|
|
}
|