
Test updates were performed using: https://gist.github.com/nikic/98357b71fd67756b0f064c9517b62a34 These are only the test updates where the test passed without further modification (which is almost all of them, as the backend is largely pointer-type agnostic).
66 lines
3.2 KiB
LLVM
66 lines
3.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
|
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
|
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
|
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX
|
|
|
|
; Combine tests involving SSE41 target shuffles (BLEND,INSERTPS,MOVZX)
|
|
|
|
declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
|
|
|
|
define <16 x i8> @combine_vpshufb_as_movzx(<16 x i8> %a0) {
|
|
; SSE-LABEL: combine_vpshufb_as_movzx:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: combine_vpshufb_as_movzx:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
|
; AVX-NEXT: retq
|
|
%res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 undef, i8 undef, i8 undef, i8 undef, i8 -1, i8 -1, i8 -1, i8 -1>)
|
|
ret <16 x i8> %res0
|
|
}
|
|
|
|
define <16 x i8> @PR50049(ptr %p1, ptr %p2) {
|
|
; SSE-LABEL: PR50049:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: movdqa (%rdi), %xmm2
|
|
; SSE-NEXT: movdqa 16(%rdi), %xmm0
|
|
; SSE-NEXT: movdqa 32(%rdi), %xmm1
|
|
; SSE-NEXT: movdqa (%rsi), %xmm4
|
|
; SSE-NEXT: movdqa 16(%rsi), %xmm5
|
|
; SSE-NEXT: movdqa 32(%rsi), %xmm3
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm6 = <128,128,128,128,128,128,2,5,8,11,14,u,u,u,u,u>
|
|
; SSE-NEXT: pshufb %xmm6, %xmm0
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm7 = <0,3,6,9,12,15,128,128,128,128,128,u,u,u,u,u>
|
|
; SSE-NEXT: pshufb %xmm7, %xmm2
|
|
; SSE-NEXT: por %xmm0, %xmm2
|
|
; SSE-NEXT: pshufb %xmm6, %xmm5
|
|
; SSE-NEXT: pshufb %xmm7, %xmm4
|
|
; SSE-NEXT: por %xmm5, %xmm4
|
|
; SSE-NEXT: pmovzxbw {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
|
|
; SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
|
; SSE-NEXT: pmullw %xmm5, %xmm0
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
|
|
; SSE-NEXT: pand %xmm5, %xmm0
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm6 = <8,u,9,u,10,u,128,u,128,u,128,u,128,u,128,u>
|
|
; SSE-NEXT: pshufb %xmm6, %xmm4
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm7 = <128,u,128,u,128,u,1,u,4,u,7,u,10,u,13,u>
|
|
; SSE-NEXT: pshufb %xmm7, %xmm3
|
|
; SSE-NEXT: por %xmm4, %xmm3
|
|
; SSE-NEXT: pshufb %xmm6, %xmm2
|
|
; SSE-NEXT: pshufb %xmm7, %xmm1
|
|
; SSE-NEXT: por %xmm2, %xmm1
|
|
; SSE-NEXT: pmullw %xmm3, %xmm1
|
|
; SSE-NEXT: pand %xmm5, %xmm1
|
|
; SSE-NEXT: packuswb %xmm1, %xmm0
|
|
; SSE-NEXT: retq
|
|
%x1 = load <48 x i8>, ptr %p1, align 16
|
|
%x2 = load <48 x i8>, ptr %p2, align 16
|
|
%s1 = shufflevector <48 x i8> %x1, <48 x i8> poison, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
|
|
%s2 = shufflevector <48 x i8> %x2, <48 x i8> poison, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
|
|
%r = mul <16 x i8> %s1, %s2
|
|
ret <16 x i8> %r
|
|
}
|