[X86] narrowShuffle - only narrow from legal vector types

Fixes #62653
This commit is contained in:
Simon Pilgrim 2023-05-12 16:41:12 +01:00
parent 2bd6077d7f
commit c06a61f78e
3 changed files with 132 additions and 5 deletions

View File

@ -42788,9 +42788,9 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
/// low half of each source vector and does not set any high half elements in /// low half of each source vector and does not set any high half elements in
/// the destination vector, narrow the shuffle to half its original size. /// the destination vector, narrow the shuffle to half its original size.
static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) {
if (!Shuf->getValueType(0).isSimple()) EVT VT = Shuf->getValueType(0);
if (!DAG.getTargetLoweringInfo().isTypeLegal(Shuf->getValueType(0)))
return SDValue(); return SDValue();
MVT VT = Shuf->getSimpleValueType(0);
if (!VT.is256BitVector() && !VT.is512BitVector()) if (!VT.is256BitVector() && !VT.is512BitVector())
return SDValue(); return SDValue();
@ -42814,7 +42814,7 @@ static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) {
// the wide shuffle that we started with. // the wide shuffle that we started with.
return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0), return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0),
Shuf->getOperand(1), HalfMask, HalfIdx1, Shuf->getOperand(1), HalfMask, HalfIdx1,
HalfIdx2, false, DAG, /*UseConcat*/true); HalfIdx2, false, DAG, /*UseConcat*/ true);
} }
static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,

View File

@ -1840,8 +1840,7 @@ define i32 @partial_reduction_sub_v16i32(<16 x i32> %x) {
; AVX2-FAST: # %bb.0: ; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX2-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; AVX2-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; AVX2-FAST-NEXT: vphsubd %xmm0, %xmm0, %xmm0
; AVX2-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX2-FAST-NEXT: vmovd %xmm0, %eax ; AVX2-FAST-NEXT: vmovd %xmm0, %eax
; AVX2-FAST-NEXT: vzeroupper ; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq ; AVX2-FAST-NEXT: retq

View File

@ -0,0 +1,128 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
define <64 x i4> @pr62653(<64 x i4> %a0) nounwind {
; CHECK-LABEL: pr62653:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $r9d killed $r9d def $r9
; CHECK-NEXT: # kill: def $r8d killed $r8d def $r8
; CHECK-NEXT: # kill: def $ecx killed $ecx def $rcx
; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; CHECK-NEXT: andl $15, %edi
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
; CHECK-NEXT: andl $15, %r10d
; CHECK-NEXT: shlq $4, %r10
; CHECK-NEXT: orq %rdi, %r10
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; CHECK-NEXT: andl $15, %edi
; CHECK-NEXT: shlq $8, %rdi
; CHECK-NEXT: orq %r10, %rdi
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
; CHECK-NEXT: andl $15, %r10d
; CHECK-NEXT: shlq $12, %r10
; CHECK-NEXT: orq %rdi, %r10
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
; CHECK-NEXT: andl $15, %r11d
; CHECK-NEXT: shlq $16, %r11
; CHECK-NEXT: orq %r10, %r11
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; CHECK-NEXT: andl $15, %edi
; CHECK-NEXT: shlq $20, %rdi
; CHECK-NEXT: orq %r11, %rdi
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
; CHECK-NEXT: andl $15, %r10d
; CHECK-NEXT: shlq $24, %r10
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
; CHECK-NEXT: andl $15, %r11d
; CHECK-NEXT: shlq $28, %r11
; CHECK-NEXT: orq %r10, %r11
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
; CHECK-NEXT: andl $15, %r10d
; CHECK-NEXT: shlq $32, %r10
; CHECK-NEXT: orq %r11, %r10
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
; CHECK-NEXT: andl $15, %r11d
; CHECK-NEXT: shlq $36, %r11
; CHECK-NEXT: orq %r10, %r11
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
; CHECK-NEXT: andl $15, %r10d
; CHECK-NEXT: shlq $40, %r10
; CHECK-NEXT: orq %r11, %r10
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
; CHECK-NEXT: andl $15, %r11d
; CHECK-NEXT: shlq $44, %r11
; CHECK-NEXT: orq %r10, %r11
; CHECK-NEXT: orq %rdi, %r11
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; CHECK-NEXT: andl $15, %edi
; CHECK-NEXT: shlq $48, %rdi
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
; CHECK-NEXT: andl $15, %r10d
; CHECK-NEXT: shlq $52, %r10
; CHECK-NEXT: orq %rdi, %r10
; CHECK-NEXT: orq %r11, %r10
; CHECK-NEXT: movq %r10, 8(%rax)
; CHECK-NEXT: andl $15, %esi
; CHECK-NEXT: andl $15, %edx
; CHECK-NEXT: shlq $4, %rdx
; CHECK-NEXT: orq %rsi, %rdx
; CHECK-NEXT: andl $15, %ecx
; CHECK-NEXT: shlq $8, %rcx
; CHECK-NEXT: orq %rdx, %rcx
; CHECK-NEXT: andl $15, %r8d
; CHECK-NEXT: shlq $12, %r8
; CHECK-NEXT: orq %rcx, %r8
; CHECK-NEXT: andl $15, %r9d
; CHECK-NEXT: shlq $16, %r9
; CHECK-NEXT: orq %r8, %r9
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; CHECK-NEXT: andl $15, %ecx
; CHECK-NEXT: shlq $20, %rcx
; CHECK-NEXT: orq %r9, %rcx
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
; CHECK-NEXT: andl $15, %esi
; CHECK-NEXT: shlq $24, %rsi
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: andl $15, %edx
; CHECK-NEXT: shlq $28, %rdx
; CHECK-NEXT: orq %rsi, %rdx
; CHECK-NEXT: orq %rcx, %rdx
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; CHECK-NEXT: andl $15, %ecx
; CHECK-NEXT: shlq $32, %rcx
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
; CHECK-NEXT: andl $15, %esi
; CHECK-NEXT: shlq $36, %rsi
; CHECK-NEXT: orq %rcx, %rsi
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; CHECK-NEXT: andl $15, %ecx
; CHECK-NEXT: shlq $40, %rcx
; CHECK-NEXT: orq %rsi, %rcx
; CHECK-NEXT: orq %rdx, %rcx
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: andl $15, %edx
; CHECK-NEXT: shlq $44, %rdx
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
; CHECK-NEXT: andl $15, %esi
; CHECK-NEXT: shlq $48, %rsi
; CHECK-NEXT: orq %rdx, %rsi
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: andl $15, %edx
; CHECK-NEXT: shlq $52, %rdx
; CHECK-NEXT: orq %rsi, %rdx
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
; CHECK-NEXT: andl $15, %esi
; CHECK-NEXT: shlq $56, %rsi
; CHECK-NEXT: orq %rdx, %rsi
; CHECK-NEXT: orq %rcx, %rsi
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; CHECK-NEXT: shlq $60, %rcx
; CHECK-NEXT: orq %rsi, %rcx
; CHECK-NEXT: movq %rcx, (%rax)
; CHECK-NEXT: retq
%res = shufflevector <64 x i4> %a0, <64 x i4> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 64, i32 65, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
ret <64 x i4> %res
}