diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0ea2212aeeef..4155d7434922 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -12613,11 +12613,13 @@ public: } InstructionCost createFreeze(InstructionCost Cost) { return Cost; } /// Finalize emission of the shuffles. - InstructionCost - finalize(ArrayRef ExtMask, - ArrayRef> SubVectors, - ArrayRef SubVectorsMask, unsigned VF = 0, - function_ref &)> Action = {}) { + InstructionCost finalize( + ArrayRef ExtMask, + ArrayRef> SubVectors, + ArrayRef SubVectorsMask, unsigned VF = 0, + function_ref &, + function_ref)>)> + Action = {}) { IsFinalized = true; if (Action) { const PointerUnion &Vec = InVectors.front(); @@ -12629,7 +12631,10 @@ public: assert(VF > 0 && "Expected vector length for the final value before action."); Value *V = cast(Vec); - Action(V, CommonMask); + Action(V, CommonMask, [this](Value *V1, Value *V2, ArrayRef Mask) { + Cost += createShuffle(V1, V2, Mask); + return V1; + }); InVectors.front() = V; } if (!SubVectors.empty()) { @@ -16593,11 +16598,13 @@ public: /// Finalize emission of the shuffles. /// \param Action the action (if any) to be performed before final applying of /// the \p ExtMask mask. - Value * - finalize(ArrayRef ExtMask, - ArrayRef> SubVectors, - ArrayRef SubVectorsMask, unsigned VF = 0, - function_ref &)> Action = {}) { + Value *finalize( + ArrayRef ExtMask, + ArrayRef> SubVectors, + ArrayRef SubVectorsMask, unsigned VF = 0, + function_ref &, + function_ref)>)> + Action = {}) { IsFinalized = true; if (Action) { Value *Vec = InVectors.front(); @@ -16616,7 +16623,9 @@ public: std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), VecVF), 0); Vec = createShuffle(Vec, nullptr, ResizeMask); } - Action(Vec, CommonMask); + Action(Vec, CommonMask, [this](Value *V1, Value *V2, ArrayRef Mask) { + return createShuffle(V1, V2, Mask); + }); InVectors.front() = Vec; } if (!SubVectors.empty()) { @@ -17278,9 +17287,67 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy, else Res = ShuffleBuilder.finalize( E->ReuseShuffleIndices, SubVectors, SubVectorsMask, E->Scalars.size(), - [&](Value *&Vec, SmallVectorImpl &Mask) { - TryPackScalars(NonConstants, Mask, /*IsRootPoison=*/false); - Vec = ShuffleBuilder.gather(NonConstants, Mask.size(), Vec); + [&](Value *&Vec, SmallVectorImpl &Mask, auto CreateShuffle) { + bool IsSplat = isSplat(NonConstants); + SmallVector BVMask(Mask.size(), PoisonMaskElem); + TryPackScalars(NonConstants, BVMask, /*IsRootPoison=*/false); + auto CheckIfSplatIsProfitable = [&]() { + // Estimate the cost of splatting + shuffle and compare with + // insert + shuffle. + constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + Value *V = *find_if_not(NonConstants, IsaPred); + if (isa(V) || isVectorized(V)) + return false; + InstructionCost SplatCost = TTI->getVectorInstrCost( + Instruction::InsertElement, VecTy, CostKind, /*Index=*/0, + PoisonValue::get(VecTy), V); + SmallVector NewMask(Mask.begin(), Mask.end()); + for (auto [Idx, I] : enumerate(BVMask)) + if (I != PoisonMaskElem) + NewMask[Idx] = Mask.size(); + SplatCost += ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, VecTy, + NewMask, CostKind); + InstructionCost BVCost = TTI->getVectorInstrCost( + Instruction::InsertElement, VecTy, CostKind, + *find_if(Mask, [](int I) { return I != PoisonMaskElem; }), + Vec, V); + // Shuffle required? + if (count(BVMask, PoisonMaskElem) < + static_cast(BVMask.size() - 1)) { + SmallVector NewMask(Mask.begin(), Mask.end()); + for (auto [Idx, I] : enumerate(BVMask)) + if (I != PoisonMaskElem) + NewMask[Idx] = I; + BVCost += ::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc, + VecTy, NewMask, CostKind); + } + return SplatCost <= BVCost; + }; + if (!IsSplat || Mask.size() <= 2 || !CheckIfSplatIsProfitable()) { + for (auto [Idx, I] : enumerate(BVMask)) + if (I != PoisonMaskElem) + Mask[Idx] = I; + Vec = ShuffleBuilder.gather(NonConstants, Mask.size(), Vec); + } else { + Value *V = *find_if_not(NonConstants, IsaPred); + SmallVector Values(NonConstants.size(), + PoisonValue::get(ScalarTy)); + Values[0] = V; + Value *BV = ShuffleBuilder.gather(Values, BVMask.size()); + SmallVector SplatMask(BVMask.size(), PoisonMaskElem); + transform(BVMask, SplatMask.begin(), [](int I) { + return I == PoisonMaskElem ? PoisonMaskElem : 0; + }); + if (!ShuffleVectorInst::isIdentityMask(SplatMask, VF)) + BV = CreateShuffle(BV, nullptr, SplatMask); + for (auto [Idx, I] : enumerate(BVMask)) + if (I != PoisonMaskElem) + Mask[Idx] = BVMask.size() + Idx; + Vec = CreateShuffle(Vec, BV, Mask); + for (auto [Idx, I] : enumerate(Mask)) + if (I != PoisonMaskElem) + Mask[Idx] = Idx; + } }); } else if (!allConstant(GatheredScalars)) { // Gather unique scalars and all constants. diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll index fcd3bfc3f323..295a71899c33 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll @@ -38,7 +38,8 @@ define void @test() { ; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]] ; CHECK: [[BB77]]: ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x float> [[TMP12]], float [[I70]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 0 +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP17]], <8 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 1 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x float> [[TMP14]], float [[I68]], i32 2 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x float> [[TMP19]], float [[I66]], i32 3 @@ -48,7 +49,7 @@ define void @test() { ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x float> [[TMP39]], <16 x float> [[TMP25]], <16 x i32> ; CHECK-NEXT: br label %[[BB78:.*]] ; CHECK: [[BB78]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP17]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP23]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ] ; CHECK-NEXT: [[TMP22:%.*]] = phi <8 x float> [ [[TMP21]], %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ] ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <8 x float> [[TMP22]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll index 27f3155b50db..acfd4581f98f 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll @@ -255,7 +255,9 @@ define void @select_uniform_ugt_16xi8(ptr %ptr, i8 %x) { ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GEP_12]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i8> [[TMP0]], <8 x i8> [[TMP4]], <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[L_11]], i32 11 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> poison, i8 [[L_11]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> [[TMP11]], <16 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP0]], i64 0) ; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP7]], <4 x i8> [[TMP3]], i64 12) ; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], splat (i8 -1) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll b/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll index 02327272f3ab..992909fb3e87 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll @@ -12,7 +12,9 @@ define i32 @test() { ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> zeroinitializer, [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <12 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[G_228_PROMOTED166_I1105_I]], i32 7 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[G_228_PROMOTED166_I1105_I]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> [[TMP9]], <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v12i32(<16 x i32> poison, <12 x i32> [[TMP3]], i64 0) ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP8]], <16 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <16 x i32> [[TMP11]], zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll b/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll index 878b2370bfd2..2a54ae9a1e74 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll @@ -9,6 +9,8 @@ define i32 @test(i64 %l.549) { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[CONV3]], i32 3 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 0, i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP3]], i64 0, i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i64> poison, i64 [[L_549]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: br label %[[IF_THEN19:.*]] ; CHECK: [[P:.*]]: ; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i64> [ zeroinitializer, %[[IF_END29:.*]] ], [ [[TMP13:%.*]], %[[IF_END25:.*]] ] @@ -23,20 +25,20 @@ define i32 @test(i64 %l.549) { ; CHECK: [[LOR_LHS_FALSE]]: ; CHECK-NEXT: br i1 false, label %[[LAND_LHS_TRUE]], label %[[S]] ; CHECK: [[R]]: -; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i64> [ [[TMP7]], %[[Q]] ], [ [[TMP16:%.*]], %[[IF_THEN19]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i64> [ [[TMP7]], %[[Q]] ], [ [[TMP16:%.*]], %[[IF_THEN19]] ] ; CHECK-NEXT: br i1 false, label %[[S]], label %[[LAND_LHS_TRUE]] ; CHECK: [[LAND_LHS_TRUE]]: -; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i64> [ [[TMP8]], %[[R]] ], [ zeroinitializer, %[[LOR_LHS_FALSE]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i64> [ [[TMP18]], %[[R]] ], [ zeroinitializer, %[[LOR_LHS_FALSE]] ] ; CHECK-NEXT: br i1 false, label %[[Q]], label %[[S]] ; CHECK: [[S]]: -; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP9]], %[[LAND_LHS_TRUE]] ], [ [[TMP8]], %[[R]] ], [ [[TMP7]], %[[LOR_LHS_FALSE]] ], [ [[TMP17]], %[[P]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP19]], %[[LAND_LHS_TRUE]] ], [ [[TMP18]], %[[R]] ], [ [[TMP7]], %[[LOR_LHS_FALSE]] ], [ [[TMP17]], %[[P]] ] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <2 x i32> ; CHECK-NEXT: br label %[[IF_THEN19]] ; CHECK: [[IF_THEN19]]: ; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i64> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP11]], %[[S]] ] ; CHECK-NEXT: [[TMP13]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> poison, <2 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[L_549]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i64> [[TMP14]], <4 x i64> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP16]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> [[TMP15]], <2 x i64> [[TMP2]], i64 2) ; CHECK-NEXT: br i1 false, label %[[R]], label %[[IF_END25]] ; CHECK: [[IF_END25]]: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll index e93e741c9baa..289c6002851d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll @@ -30,16 +30,18 @@ define i32 @test(i32 %s.0) { ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> , <8 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> [[TMP14]], i32 [[J_4]], i32 7 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> poison, i32 [[J_4]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x i32> [[TMP15]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <8 x i32> [[TMP14]], <8 x i32> [[TMP16]], <8 x i32> ; CHECK-NEXT: br label %[[IF_END24]] ; CHECK: [[IF_THEN18:.*]]: ; CHECK-NEXT: br label %[[T]] ; CHECK: [[T]]: -; CHECK-NEXT: [[TMP16:%.*]] = phi <8 x i32> [ [[TMP27:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ] +; CHECK-NEXT: [[TMP30:%.*]] = phi <8 x i32> [ [[TMP27:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ] ; CHECK-NEXT: [[TMP17]] = extractelement <4 x i32> [[TMP23:%.*]], i32 0 ; CHECK-NEXT: br i1 false, label %[[IF_END24]], label %[[K]] ; CHECK: [[IF_END24]]: -; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP15]], %[[IF_THEN11]] ], [ [[TMP11]], %[[IF_END6]] ], [ [[TMP16]], %[[T]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP29]], %[[IF_THEN11]] ], [ [[TMP11]], %[[IF_END6]] ], [ [[TMP30]], %[[T]] ] ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll index d474218e84cc..281019e8befa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll @@ -124,15 +124,15 @@ define double @preserve_loop_info(ptr %arg, i1 %arg2) { ; CHECK: outer.header: ; CHECK-NEXT: br label [[INNER:%.*]] ; CHECK: inner: -; CHECK-NEXT: br i1 %arg2, label [[OUTER_LATCH:%.*]], label [[INNER]] +; CHECK-NEXT: br i1 [[ARG2:%.*]], label [[OUTER_LATCH:%.*]], label [[INNER]] ; CHECK: outer.latch: -; CHECK-NEXT: br i1 %arg2, label [[BB:%.*]], label [[OUTER_HEADER]] +; CHECK-NEXT: br i1 [[ARG2]], label [[BB:%.*]], label [[OUTER_HEADER]] ; CHECK: bb: ; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr undef, align 8 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x double], ptr [[TMP]], i64 0, i64 1 ; CHECK-NEXT: br label [[LOOP_3HEADER:%.*]] ; CHECK: loop.3header: -; CHECK-NEXT: br i1 %arg2, label [[LOOP_3LATCH:%.*]], label [[BB9:%.*]] +; CHECK-NEXT: br i1 [[ARG2]], label [[LOOP_3LATCH:%.*]], label [[BB9:%.*]] ; CHECK: bb9: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x double], ptr [[TMP5]], i64 undef, i64 1 ; CHECK-NEXT: store double undef, ptr [[TMP]], align 16 @@ -140,7 +140,7 @@ define double @preserve_loop_info(ptr %arg, i1 %arg2) { ; CHECK-NEXT: store double [[TMP12]], ptr [[TMP7]], align 8 ; CHECK-NEXT: br label [[LOOP_3LATCH]] ; CHECK: loop.3latch: -; CHECK-NEXT: br i1 %arg2, label [[BB14:%.*]], label [[LOOP_3HEADER]] +; CHECK-NEXT: br i1 [[ARG2]], label [[BB14:%.*]], label [[LOOP_3HEADER]] ; CHECK: bb14: ; CHECK-NEXT: [[TMP15:%.*]] = call double undef(ptr [[TMP]], ptr [[ARG:%.*]]) ; CHECK-NEXT: ret double undef @@ -189,7 +189,8 @@ define void @gather_sequence_crash(<2 x float> %arg, ptr %arg1, float %arg2, ptr ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[ARG1:%.*]], i32 3 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x float> [[ARG:%.*]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> , <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[ARG2:%.*]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> poison, float [[ARG2:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], zeroinitializer ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP8]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll index 230e165e43ed..ff1395a45084 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll @@ -6,7 +6,9 @@ define i32 @foo() { ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[D:%.*]] = load i32, ptr null, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> , i32 [[D]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[D]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]] ; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll index 7fe6941d52da..2612a21b9eed 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll @@ -11,7 +11,8 @@ define i32 @test(i1 %cond) { ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], %[[BB]] ], [ zeroinitializer, %[[ENTRY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[P1]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[P1]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> zeroinitializer, [[TMP4]] ; CHECK-NEXT: [[OR92]] = or i32 1, 0 ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP5]]) diff --git a/llvm/test/Transforms/SLPVectorizer/icmp-altopcode-after-reordering.ll b/llvm/test/Transforms/SLPVectorizer/icmp-altopcode-after-reordering.ll index 61e3c6cdb886..002b9a70255d 100644 --- a/llvm/test/Transforms/SLPVectorizer/icmp-altopcode-after-reordering.ll +++ b/llvm/test/Transforms/SLPVectorizer/icmp-altopcode-after-reordering.ll @@ -9,12 +9,13 @@ define i32 @test(ptr %sptr, i64 %0) { ; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP0]] to i32 ; CHECK-NEXT: [[IV2:%.*]] = getelementptr i8, ptr [[SPTR]], i64 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[IV2]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[CONV_I]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[CONV_I]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp sle <4 x i32> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp sle <4 x i32> [[TMP2]], [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i1> [[TMP7]], <4 x i1> [[TMP12]], <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]] diff --git a/llvm/test/Transforms/SLPVectorizer/reordering-single-phi.ll b/llvm/test/Transforms/SLPVectorizer/reordering-single-phi.ll index a70daf9cf8d6..cf7faf8448c1 100644 --- a/llvm/test/Transforms/SLPVectorizer/reordering-single-phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/reordering-single-phi.ll @@ -19,7 +19,8 @@ define void @test() { ; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX31]], align 4 ; CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP14]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP7]], <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x float> [[TMP11]], [[TMP14]] ; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[ARRAYIDX6]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 5