
lowerBuildVectorAsBroadcast will not broadcast splat constants in all cases, resulting in a lot of situations where a full width vector load that has failed to fold but is loading splat constant values could use a broadcast load instruction just as cheaply, and save constant pool space. This is an updated commit of ab4b924832ce26c21b88d7f82fcf4992ea8906bb after being reverted at 78de45fd4a902066617fcc9bb88efee11f743bc6
50 lines
2.3 KiB
LLVM
50 lines
2.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
|
|
|
|
; This matter of this test is ensuring that vpackus* is not used for umin+trunc combination, since vpackus* input is a signed number.
|
|
|
|
define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
|
|
; AVX-LABEL: usat_trunc_wb_256:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
|
|
; AVX-NEXT: vpminuw %xmm2, %xmm1, %xmm1
|
|
; AVX-NEXT: vpminuw %xmm2, %xmm0, %xmm0
|
|
; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vzeroupper
|
|
; AVX-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: usat_trunc_wb_256:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vpmovuswb %ymm0, %xmm0
|
|
; AVX512-NEXT: vzeroupper
|
|
; AVX512-NEXT: retq
|
|
%x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%x6 = trunc <16 x i16> %x5 to <16 x i8>
|
|
ret <16 x i8> %x6
|
|
}
|
|
|
|
define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
|
|
; AVX-LABEL: usat_trunc_dw_256:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [65535,65535,65535,65535]
|
|
; AVX-NEXT: vpminud %xmm2, %xmm1, %xmm1
|
|
; AVX-NEXT: vpminud %xmm2, %xmm0, %xmm0
|
|
; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vzeroupper
|
|
; AVX-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: usat_trunc_dw_256:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vpmovusdw %ymm0, %xmm0
|
|
; AVX512-NEXT: vzeroupper
|
|
; AVX512-NEXT: retq
|
|
%x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%x6 = trunc <8 x i32> %x5 to <8 x i16>
|
|
ret <8 x i16> %x6
|
|
}
|