[DAG] visitINSERT_VECTOR_ELT - extend folding to BUILD_VECTOR if all missing elements from an insertion chain are known zero
This commit is contained in:
parent
e8d260753e
commit
b43d7aacf8
@ -19632,6 +19632,23 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
|
||||
// Failed to find a match in the chain - bail.
|
||||
break;
|
||||
}
|
||||
|
||||
// See if we can fill in the missing constant elements as zeros.
|
||||
// TODO: Should we do this for any constant?
|
||||
APInt DemandedZeroElts = APInt::getZero(NumElts);
|
||||
for (int I = 0; I != NumElts; ++I)
|
||||
if (!Ops[I])
|
||||
DemandedZeroElts.setBit(I);
|
||||
|
||||
if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
|
||||
SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
|
||||
: DAG.getConstantFP(0, DL, MaxEltVT);
|
||||
for (int I = 0; I != NumElts; ++I)
|
||||
if (!Ops[I])
|
||||
Ops[I] = Zero;
|
||||
|
||||
return CanonicalizeBuildVector(Ops);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
@ -562,8 +562,8 @@ define <8 x float> @PR41512_v8f32(float %x, float %y) {
|
||||
; AVX-LABEL: PR41512_v8f32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
|
||||
; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
|
||||
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
|
||||
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX-NEXT: retq
|
||||
%ins1 = insertelement <8 x float> zeroinitializer, float %x, i32 0
|
||||
|
@ -12,9 +12,7 @@
|
||||
define i1 @dont_hit_assert(i24 signext %d) {
|
||||
; CHECK-LABEL: dont_hit_assert:
|
||||
; CHECK: # %bb.0: # %for.cond
|
||||
; CHECK-NEXT: movb $-1, %al
|
||||
; CHECK-NEXT: negb %al
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: retq
|
||||
for.cond:
|
||||
%t0 = insertelement <8 x i24> zeroinitializer, i24 1, i32 0
|
||||
|
@ -534,8 +534,8 @@ define <16 x float> @test_masked_permps_v16f32(ptr %vp, <16 x float> %vec2) {
|
||||
define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
|
||||
; SKX64-LABEL: test_demandedelts_pshufb_v32i8_v16i8:
|
||||
; SKX64: # %bb.0:
|
||||
; SKX64-NEXT: vmovdqa 32(%rdi), %xmm0
|
||||
; SKX64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SKX64-NEXT: vpbroadcastd 44(%rdi), %xmm0
|
||||
; SKX64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; SKX64-NEXT: vmovdqa %ymm0, 672(%rsi)
|
||||
; SKX64-NEXT: vmovdqa 208(%rdi), %xmm0
|
||||
; SKX64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
@ -545,11 +545,11 @@ define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
|
||||
;
|
||||
; KNL64-LABEL: test_demandedelts_pshufb_v32i8_v16i8:
|
||||
; KNL64: # %bb.0:
|
||||
; KNL64-NEXT: vmovdqa 32(%rdi), %xmm0
|
||||
; KNL64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; KNL64-NEXT: vpbroadcastd 44(%rdi), %xmm0
|
||||
; KNL64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; KNL64-NEXT: vmovdqa %ymm0, 672(%rsi)
|
||||
; KNL64-NEXT: vmovdqa 208(%rdi), %xmm0
|
||||
; KNL64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; KNL64-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,0,2,3]
|
||||
; KNL64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; KNL64-NEXT: vmovdqa %ymm0, 832(%rsi)
|
||||
; KNL64-NEXT: retq
|
||||
;
|
||||
@ -557,8 +557,8 @@ define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
|
||||
; SKX32: # %bb.0:
|
||||
; SKX32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SKX32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; SKX32-NEXT: vmovdqa 32(%ecx), %xmm0
|
||||
; SKX32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SKX32-NEXT: vpbroadcastd 44(%ecx), %xmm0
|
||||
; SKX32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; SKX32-NEXT: vmovdqa %ymm0, 672(%eax)
|
||||
; SKX32-NEXT: vmovdqa 208(%ecx), %xmm0
|
||||
; SKX32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
@ -569,13 +569,13 @@ define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
|
||||
; KNL32-LABEL: test_demandedelts_pshufb_v32i8_v16i8:
|
||||
; KNL32: # %bb.0:
|
||||
; KNL32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; KNL32-NEXT: vmovdqa 32(%eax), %xmm0
|
||||
; KNL32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; KNL32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; KNL32-NEXT: vmovdqa %ymm0, 672(%ecx)
|
||||
; KNL32-NEXT: vmovdqa 208(%eax), %xmm0
|
||||
; KNL32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; KNL32-NEXT: vmovdqa %ymm0, 832(%ecx)
|
||||
; KNL32-NEXT: vpbroadcastd 44(%ecx), %xmm0
|
||||
; KNL32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; KNL32-NEXT: vmovdqa %ymm0, 672(%eax)
|
||||
; KNL32-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,0,2,3]
|
||||
; KNL32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; KNL32-NEXT: vmovdqa %ymm0, 832(%eax)
|
||||
; KNL32-NEXT: retl
|
||||
%t87 = load <16 x i32>, ptr %src, align 64
|
||||
%t88 = extractelement <16 x i32> %t87, i64 11
|
||||
|
Loading…
x
Reference in New Issue
Block a user