
Make it easier for splat/element-equivalent detection by ensuring constant splats contain no undefs. Integer constants are limited to rematerializable zeros/ones values to avoid unnecessary scalar_to_vector(int) -> load conversions - we can relax this later if useful
83 lines
5.0 KiB
LLVM
83 lines
5.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 -O0 | FileCheck %s --check-prefixes=CHECK-O0
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 -O3 | FileCheck %s --check-prefixes=CHECK-O3
|
|
|
|
define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <16 x i64> %arg3, <16 x i64> %arg4) nounwind {
|
|
; CHECK-O0-LABEL: pluto:
|
|
; CHECK-O0: # %bb.0: # %bb
|
|
; CHECK-O0-NEXT: pushq %rbp
|
|
; CHECK-O0-NEXT: movq %rsp, %rbp
|
|
; CHECK-O0-NEXT: andq $-32, %rsp
|
|
; CHECK-O0-NEXT: subq $64, %rsp
|
|
; CHECK-O0-NEXT: vmovaps %ymm4, %ymm10
|
|
; CHECK-O0-NEXT: vmovaps %ymm3, %ymm9
|
|
; CHECK-O0-NEXT: vmovaps %ymm2, (%rsp) # 32-byte Spill
|
|
; CHECK-O0-NEXT: vmovaps %ymm1, %ymm8
|
|
; CHECK-O0-NEXT: vmovaps %ymm0, %ymm3
|
|
; CHECK-O0-NEXT: vmovaps (%rsp), %ymm0 # 32-byte Reload
|
|
; CHECK-O0-NEXT: vmovaps 240(%rbp), %ymm4
|
|
; CHECK-O0-NEXT: vmovaps 208(%rbp), %ymm1
|
|
; CHECK-O0-NEXT: vmovaps 176(%rbp), %ymm2
|
|
; CHECK-O0-NEXT: vmovaps 144(%rbp), %ymm2
|
|
; CHECK-O0-NEXT: vmovaps 112(%rbp), %ymm11
|
|
; CHECK-O0-NEXT: vmovaps 80(%rbp), %ymm11
|
|
; CHECK-O0-NEXT: vmovaps 48(%rbp), %ymm11
|
|
; CHECK-O0-NEXT: vmovaps 16(%rbp), %ymm11
|
|
; CHECK-O0-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3,4,5],ymm0[6,7]
|
|
; CHECK-O0-NEXT: vxorps %xmm3, %xmm3, %xmm3
|
|
; CHECK-O0-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm2[0],ymm1[0],ymm2[2],ymm1[2]
|
|
; CHECK-O0-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
|
|
; CHECK-O0-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,2,1]
|
|
; CHECK-O0-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5],ymm0[6,7]
|
|
; CHECK-O0-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm7[2,3],ymm6[0,1]
|
|
; CHECK-O0-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2,3],ymm2[4,5,6,7]
|
|
; CHECK-O0-NEXT: vmovaps %xmm1, %xmm3
|
|
; CHECK-O0-NEXT: vmovaps %xmm7, %xmm1
|
|
; CHECK-O0-NEXT: vpblendd {{.*#+}} xmm3 = xmm1[0,1],xmm3[2,3]
|
|
; CHECK-O0-NEXT: # implicit-def: $ymm1
|
|
; CHECK-O0-NEXT: vmovaps %xmm3, %xmm1
|
|
; CHECK-O0-NEXT: vpermq {{.*#+}} ymm3 = ymm1[0,0,1,3]
|
|
; CHECK-O0-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm5[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm5[16,17,18,19,20,21,22,23]
|
|
; CHECK-O0-NEXT: vpblendd {{.*#+}} ymm3 = ymm1[0,1],ymm3[2,3,4,5],ymm1[6,7]
|
|
; CHECK-O0-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm7[0],ymm5[0],ymm7[2],ymm5[2]
|
|
; CHECK-O0-NEXT: vpblendd {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
|
|
; CHECK-O0-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,1,1,3]
|
|
; CHECK-O0-NEXT: movq %rbp, %rsp
|
|
; CHECK-O0-NEXT: popq %rbp
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: pluto:
|
|
; CHECK-O3: # %bb.0: # %bb
|
|
; CHECK-O3-NEXT: pushq %rbp
|
|
; CHECK-O3-NEXT: movq %rsp, %rbp
|
|
; CHECK-O3-NEXT: andq $-32, %rsp
|
|
; CHECK-O3-NEXT: subq $32, %rsp
|
|
; CHECK-O3-NEXT: vmovdqa 208(%rbp), %ymm3
|
|
; CHECK-O3-NEXT: vmovdqa 144(%rbp), %ymm0
|
|
; CHECK-O3-NEXT: vpblendd {{.*#+}} ymm1 = ymm6[0,1,2,3,4,5],ymm2[6,7]
|
|
; CHECK-O3-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
; CHECK-O3-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
|
|
; CHECK-O3-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
; CHECK-O3-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,1,2,1]
|
|
; CHECK-O3-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
|
|
; CHECK-O3-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm7[2,3],ymm6[0,1]
|
|
; CHECK-O3-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1],ymm2[2,3],ymm1[4,5,6,7]
|
|
; CHECK-O3-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm7[0],ymm5[0],ymm7[2],ymm5[2]
|
|
; CHECK-O3-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,1,2,3]
|
|
; CHECK-O3-NEXT: vpbroadcastq 248(%rbp), %ymm4
|
|
; CHECK-O3-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1],ymm4[2,3,4,5],ymm1[6,7]
|
|
; CHECK-O3-NEXT: vpblendd {{.*#+}} xmm3 = xmm7[0,1],xmm3[2,3]
|
|
; CHECK-O3-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,0,1,3]
|
|
; CHECK-O3-NEXT: vpslldq {{.*#+}} ymm4 = zero,zero,zero,zero,zero,zero,zero,zero,ymm5[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm5[16,17,18,19,20,21,22,23]
|
|
; CHECK-O3-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1],ymm3[2,3,4,5],ymm4[6,7]
|
|
; CHECK-O3-NEXT: movq %rbp, %rsp
|
|
; CHECK-O3-NEXT: popq %rbp
|
|
; CHECK-O3-NEXT: retq
|
|
bb:
|
|
%tmp = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg, <16 x i64> %arg1
|
|
%tmp5 = select <16 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg2, <16 x i64> zeroinitializer
|
|
%tmp6 = select <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true>, <16 x i64> %arg3, <16 x i64> %tmp5
|
|
%tmp7 = shufflevector <16 x i64> %tmp, <16 x i64> %tmp6, <16 x i32> <i32 11, i32 18, i32 24, i32 9, i32 14, i32 29, i32 29, i32 6, i32 14, i32 28, i32 8, i32 9, i32 22, i32 12, i32 25, i32 6>
|
|
ret <16 x i64> %tmp7
|
|
}
|