[DAG] visitINSERT_VECTOR_ELT - extend folding to BUILD_VECTOR if all missing elements from an insertion chain are known zero

This commit is contained in:
Simon Pilgrim 2022-08-01 11:32:23 +01:00
parent e8d260753e
commit b43d7aacf8
4 changed files with 33 additions and 18 deletions

View File

@ -19632,6 +19632,23 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// Failed to find a match in the chain - bail. // Failed to find a match in the chain - bail.
break; break;
} }
// See if we can fill in the missing constant elements as zeros.
// TODO: Should we do this for any constant?
APInt DemandedZeroElts = APInt::getZero(NumElts);
for (int I = 0; I != NumElts; ++I)
if (!Ops[I])
DemandedZeroElts.setBit(I);
if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
: DAG.getConstantFP(0, DL, MaxEltVT);
for (int I = 0; I != NumElts; ++I)
if (!Ops[I])
Ops[I] = Zero;
return CanonicalizeBuildVector(Ops);
}
} }
return SDValue(); return SDValue();

View File

@ -562,8 +562,8 @@ define <8 x float> @PR41512_v8f32(float %x, float %y) {
; AVX-LABEL: PR41512_v8f32: ; AVX-LABEL: PR41512_v8f32:
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] ; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq ; AVX-NEXT: retq
%ins1 = insertelement <8 x float> zeroinitializer, float %x, i32 0 %ins1 = insertelement <8 x float> zeroinitializer, float %x, i32 0

View File

@ -12,9 +12,7 @@
define i1 @dont_hit_assert(i24 signext %d) { define i1 @dont_hit_assert(i24 signext %d) {
; CHECK-LABEL: dont_hit_assert: ; CHECK-LABEL: dont_hit_assert:
; CHECK: # %bb.0: # %for.cond ; CHECK: # %bb.0: # %for.cond
; CHECK-NEXT: movb $-1, %al ; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: negb %al
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq ; CHECK-NEXT: retq
for.cond: for.cond:
%t0 = insertelement <8 x i24> zeroinitializer, i24 1, i32 0 %t0 = insertelement <8 x i24> zeroinitializer, i24 1, i32 0

View File

@ -534,8 +534,8 @@ define <16 x float> @test_masked_permps_v16f32(ptr %vp, <16 x float> %vec2) {
define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) { define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
; SKX64-LABEL: test_demandedelts_pshufb_v32i8_v16i8: ; SKX64-LABEL: test_demandedelts_pshufb_v32i8_v16i8:
; SKX64: # %bb.0: ; SKX64: # %bb.0:
; SKX64-NEXT: vmovdqa 32(%rdi), %xmm0 ; SKX64-NEXT: vpbroadcastd 44(%rdi), %xmm0
; SKX64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero ; SKX64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; SKX64-NEXT: vmovdqa %ymm0, 672(%rsi) ; SKX64-NEXT: vmovdqa %ymm0, 672(%rsi)
; SKX64-NEXT: vmovdqa 208(%rdi), %xmm0 ; SKX64-NEXT: vmovdqa 208(%rdi), %xmm0
; SKX64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero ; SKX64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
@ -545,11 +545,11 @@ define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
; ;
; KNL64-LABEL: test_demandedelts_pshufb_v32i8_v16i8: ; KNL64-LABEL: test_demandedelts_pshufb_v32i8_v16i8:
; KNL64: # %bb.0: ; KNL64: # %bb.0:
; KNL64-NEXT: vmovdqa 32(%rdi), %xmm0 ; KNL64-NEXT: vpbroadcastd 44(%rdi), %xmm0
; KNL64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero ; KNL64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; KNL64-NEXT: vmovdqa %ymm0, 672(%rsi) ; KNL64-NEXT: vmovdqa %ymm0, 672(%rsi)
; KNL64-NEXT: vmovdqa 208(%rdi), %xmm0 ; KNL64-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,0,2,3]
; KNL64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero ; KNL64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; KNL64-NEXT: vmovdqa %ymm0, 832(%rsi) ; KNL64-NEXT: vmovdqa %ymm0, 832(%rsi)
; KNL64-NEXT: retq ; KNL64-NEXT: retq
; ;
@ -557,8 +557,8 @@ define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
; SKX32: # %bb.0: ; SKX32: # %bb.0:
; SKX32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; SKX32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; SKX32-NEXT: vmovdqa 32(%ecx), %xmm0 ; SKX32-NEXT: vpbroadcastd 44(%ecx), %xmm0
; SKX32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero ; SKX32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; SKX32-NEXT: vmovdqa %ymm0, 672(%eax) ; SKX32-NEXT: vmovdqa %ymm0, 672(%eax)
; SKX32-NEXT: vmovdqa 208(%ecx), %xmm0 ; SKX32-NEXT: vmovdqa 208(%ecx), %xmm0
; SKX32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero ; SKX32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
@ -569,13 +569,13 @@ define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
; KNL32-LABEL: test_demandedelts_pshufb_v32i8_v16i8: ; KNL32-LABEL: test_demandedelts_pshufb_v32i8_v16i8:
; KNL32: # %bb.0: ; KNL32: # %bb.0:
; KNL32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL32-NEXT: vmovdqa 32(%eax), %xmm0
; KNL32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
; KNL32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; KNL32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; KNL32-NEXT: vmovdqa %ymm0, 672(%ecx) ; KNL32-NEXT: vpbroadcastd 44(%ecx), %xmm0
; KNL32-NEXT: vmovdqa 208(%eax), %xmm0 ; KNL32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; KNL32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero ; KNL32-NEXT: vmovdqa %ymm0, 672(%eax)
; KNL32-NEXT: vmovdqa %ymm0, 832(%ecx) ; KNL32-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,0,2,3]
; KNL32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; KNL32-NEXT: vmovdqa %ymm0, 832(%eax)
; KNL32-NEXT: retl ; KNL32-NEXT: retl
%t87 = load <16 x i32>, ptr %src, align 64 %t87 = load <16 x i32>, ptr %src, align 64
%t88 = extractelement <16 x i32> %t87, i64 11 %t88 = extractelement <16 x i32> %t87, i64 11