diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 591ee2fea314..2e33a6a5d330 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -129,11 +129,6 @@ static cl::opt cl::desc("Only vectorize if you gain more than this " "number ")); -static cl::opt SLPSkipEarlyProfitabilityCheck( - "slp-skip-early-profitability-check", cl::init(false), cl::Hidden, - cl::desc("When true, SLP vectorizer bypasses profitability checks based on " - "heuristics and makes vectorization decision via cost modeling.")); - static cl::opt ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions")); @@ -4608,15 +4603,6 @@ private: return nullptr; } - /// Check that the operand node of alternate node does not generate - /// buildvector sequence. If it is, then probably not worth it to build - /// alternate shuffle, if number of buildvector operands + alternate - /// instruction > than the number of buildvector instructions. - /// \param S the instructions state of the analyzed values. - /// \param VL list of the instructions with alternate opcodes. - bool areAltOperandsProfitable(const InstructionsState &S, - ArrayRef VL) const; - /// Contains all the outputs of legality analysis for a list of values to /// vectorize. class ScalarsVectorizationLegality { @@ -10244,120 +10230,6 @@ static std::pair generateKeySubkey( static bool isMainInstruction(Instruction *I, Instruction *MainOp, Instruction *AltOp, const TargetLibraryInfo &TLI); -bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S, - ArrayRef VL) const { - Type *ScalarTy = S.getMainOp()->getType(); - unsigned Opcode0 = S.getOpcode(); - unsigned Opcode1 = S.getAltOpcode(); - SmallBitVector OpcodeMask(getAltInstrMask(VL, ScalarTy, Opcode0, Opcode1)); - // If this pattern is supported by the target then consider it profitable. - if (TTI->isLegalAltInstr(getWidenedType(ScalarTy, VL.size()), Opcode0, - Opcode1, OpcodeMask)) - return true; - SmallVector Operands; - for (unsigned I : seq(S.getMainOp()->getNumOperands())) { - Operands.emplace_back(); - // Prepare the operand vector. - for (Value *V : VL) { - if (isa(V)) { - Operands.back().push_back( - PoisonValue::get(S.getMainOp()->getOperand(I)->getType())); - continue; - } - Operands.back().push_back(cast(V)->getOperand(I)); - } - } - if (Operands.size() == 2) { - // Try find best operands candidates. - for (unsigned I : seq(0, VL.size() - 1)) { - SmallVector> Candidates(3); - Candidates[0] = std::make_pair(Operands[0][I], Operands[0][I + 1]); - Candidates[1] = std::make_pair(Operands[0][I], Operands[1][I + 1]); - Candidates[2] = std::make_pair(Operands[1][I], Operands[0][I + 1]); - std::optional Res = findBestRootPair(Candidates); - switch (Res.value_or(0)) { - case 0: - break; - case 1: - std::swap(Operands[0][I + 1], Operands[1][I + 1]); - break; - case 2: - std::swap(Operands[0][I], Operands[1][I]); - break; - default: - llvm_unreachable("Unexpected index."); - } - } - } - DenseSet UniqueOpcodes; - constexpr unsigned NumAltInsts = 3; // main + alt + shuffle. - unsigned NonInstCnt = 0; - // Estimate number of instructions, required for the vectorized node and for - // the buildvector node. - unsigned UndefCnt = 0; - // Count the number of extra shuffles, required for vector nodes. - unsigned ExtraShuffleInsts = 0; - // Check that operands do not contain same values and create either perfect - // diamond match or shuffled match. - if (Operands.size() == 2) { - // Do not count same operands twice. - if (Operands.front() == Operands.back()) { - Operands.erase(Operands.begin()); - } else if (!allConstant(Operands.front()) && - all_of(Operands.front(), [&](Value *V) { - return is_contained(Operands.back(), V); - })) { - Operands.erase(Operands.begin()); - ++ExtraShuffleInsts; - } - } - const Loop *L = LI->getLoopFor(S.getMainOp()->getParent()); - // Vectorize node, if: - // 1. at least single operand is constant or splat. - // 2. Operands have many loop invariants (the instructions are not loop - // invariants). - // 3. At least single unique operands is supposed to vectorized. - return none_of(Operands, - [&](ArrayRef Op) { - if (allConstant(Op) || - (!isSplat(Op) && allSameBlock(Op) && allSameType(Op) && - getSameOpcode(Op, *TLI))) - return false; - DenseMap Uniques; - for (Value *V : Op) { - if (isa(V) || - isVectorized(V) || (L && L->isLoopInvariant(V))) { - if (isa(V)) - ++UndefCnt; - continue; - } - auto Res = Uniques.try_emplace(V, 0); - // Found first duplicate - need to add shuffle. - if (!Res.second && Res.first->second == 1) - ++ExtraShuffleInsts; - ++Res.first->getSecond(); - if (auto *I = dyn_cast(V)) - UniqueOpcodes.insert(I->getOpcode()); - else if (Res.second) - ++NonInstCnt; - } - return none_of(Uniques, [&](const auto &P) { - return P.first->hasNUsesOrMore(P.second + 1) && - none_of(P.first->users(), [&](User *U) { - return isVectorized(U) || Uniques.contains(U); - }); - }); - }) || - // Do not vectorize node, if estimated number of vector instructions is - // more than estimated number of buildvector instructions. Number of - // vector operands is number of vector instructions + number of vector - // instructions for operands (buildvectors). Number of buildvector - // instructions is just number_of_operands * number_of_scalars. - (UndefCnt < (VL.size() - 1) * S.getMainOp()->getNumOperands() && - (UniqueOpcodes.size() + NonInstCnt + ExtraShuffleInsts + - NumAltInsts) < S.getMainOp()->getNumOperands() * VL.size()); -} - /// Builds the arguments types vector for the given call instruction with the /// given \p ID for the specified vector factor. static SmallVector @@ -10827,13 +10699,6 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); return TreeEntry::NeedToGather; } - if (!SLPSkipEarlyProfitabilityCheck && !areAltOperandsProfitable(S, VL)) { - LLVM_DEBUG( - dbgs() - << "SLP: ShuffleVector not vectorized, operands are buildvector and " - "the whole alt sequence is not profitable.\n"); - return TreeEntry::NeedToGather; - } return TreeEntry::Vectorize; } @@ -17274,6 +17139,8 @@ InstructionCost BoUpSLP::calculateTreeCostAndTrimNonProfitable( auto It = MinBWs.find(TE); if (It != MinBWs.end()) ScalarTy = IntegerType::get(ScalarTy->getContext(), It->second.first); + if (isa(TE->Scalars.front())) + ScalarTy = TE->Scalars.front()->getType(); auto *VecTy = getWidenedType(ScalarTy, Sz); const unsigned EntryVF = TE->getVectorFactor(); auto *FinalVecTy = getWidenedType(ScalarTy, EntryVF); @@ -17302,7 +17169,8 @@ InstructionCost BoUpSLP::calculateTreeCostAndTrimNonProfitable( ::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc, FinalVecTy, Mask); // If all scalars are reused in gather node(s) or other vector nodes, there // might be extra cost for inserting them. - if (all_of(TE->Scalars, [&](Value *V) { + if ((!TE->hasState() || !TE->isAltShuffle()) && + all_of(TE->Scalars, [&](Value *V) { return (TE->hasCopyableElements() && TE->isCopyableElement(V)) || isConstant(V) || isGathered(V) || getTreeEntries(V).size() > 1; })) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll index 38705032ce1c..77a1c812c52a 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll @@ -34,6 +34,13 @@ define void @test(i64 %0, i64 %1, i64 %2, i64 %3, i64 %.sroa.3341.0.copyload, i6 ; CHECK-NEXT: [[TMP80:%.*]] = insertelement <64 x i64> , i64 [[TMP1]], i32 11 ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> poison, <28 x i32> ; CHECK-NEXT: [[TMP84:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <14 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP85:%.*]] = shufflevector <2 x i64> [[TMP45]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <2 x i64> [[TMP85]], <2 x i64> , <2 x i32> +; CHECK-NEXT: [[TMP87:%.*]] = mul <2 x i64> [[TMP85]], [[TMP86]] +; CHECK-NEXT: [[TMP88:%.*]] = or <2 x i64> [[TMP85]], [[TMP86]] +; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <2 x i64> [[TMP87]], <2 x i64> [[TMP88]], <2 x i32> +; CHECK-NEXT: [[TMP90:%.*]] = shufflevector <2 x i64> [[TMP89]], <2 x i64> poison, <64 x i32> ; CHECK-NEXT: br label %[[DOTLR_PH1977_US:.*]] ; CHECK: [[_LR_PH1977_US:.*:]] ; CHECK-NEXT: [[INDVAR37888:%.*]] = phi i64 [ 0, [[DOTLR_PH_PREHEADER:%.*]] ], [ 1, %[[DOTLR_PH1977_US]] ] @@ -41,15 +48,12 @@ define void @test(i64 %0, i64 %1, i64 %2, i64 %3, i64 %.sroa.3341.0.copyload, i6 ; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i64> [[TMP34]], <4 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP36:%.*]] = mul <4 x i64> [[TMP20]], [[TMP31]] ; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = mul i64 [[TMP0]], [[TMP0]] -; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP40:%.*]] = or <2 x i64> [[TMP42]], splat (i64 1) ; CHECK-NEXT: [[TMP41:%.*]] = shl <2 x i64> [[TMP42]], splat (i64 1) ; CHECK-NEXT: [[TMP39:%.*]] = mul i64 [[TMP0]], [[TMP0]] ; CHECK-NEXT: [[TMP43:%.*]] = add <8 x i64> [[TMP35]], [[TMP25]] ; CHECK-NEXT: [[TMP44:%.*]] = insertelement <64 x i64> [[TMP80]], i64 [[INDVAR37888]], i32 1 -; CHECK-NEXT: [[TMP45:%.*]] = insertelement <64 x i64> [[TMP44]], i64 [[TMP27]], i32 2 -; CHECK-NEXT: [[TMP46:%.*]] = insertelement <64 x i64> [[TMP45]], i64 [[TMP30]], i32 3 +; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <64 x i64> [[TMP44]], <64 x i64> [[TMP90]], <64 x i32> ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <8 x i64> [[TMP35]], <8 x i64> poison, <64 x i32> ; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <4 x i64> [[TMP34]], <4 x i64> poison, <64 x i32> ; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <64 x i64> [[TMP46]], <64 x i64> [[TMP48]], <64 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll index fc805b226d3b..06c4bc205adf 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll @@ -59,12 +59,20 @@ define i32 @test1(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[D_0:%.*]] = load i16, ptr [[P]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> , i16 [[D_0]], i32 0 +; CHECK-NEXT: [[SZERO_2:%.*]] = sext i16 -1 to i32 +; CHECK-NEXT: [[UZERO_1:%.*]] = zext i16 -1 to i32 +; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i16> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[UZERO_1]], i32 2 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[SZERO_2]], i32 3 +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP13]], [[TMP12]] ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP5]], <4 x i16> [[TMP10]], <4 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[TMP11]] to <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) ; CHECK-NEXT: ret i32 [[TMP8]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll index f2ea2df7cc98..78fc3a60f051 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll @@ -1,31 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-100 -mtriple=x86_64-w64-windows-gnu < %s | FileCheck %s -; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-100 -mtriple=x86_64-w64-windows-gnu\ -; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED define void @test(i16 %0) { -; FORCED-LABEL: @test( -; FORCED-NEXT: for.body92.preheader: -; FORCED-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> , i16 [[TMP0:%.*]], i32 1 -; FORCED-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32> -; FORCED-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32> -; FORCED-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> -; FORCED-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> -; FORCED-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP5]], <4 x i32> -; FORCED-NEXT: br label [[FOR_BODY92:%.*]] -; FORCED: for.body92: -; FORCED-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP6]] -; FORCED-NEXT: store <4 x i32> [[TMP7]], ptr undef, align 8 -; FORCED-NEXT: br label [[FOR_BODY92]] -; ; CHECK-LABEL: @test( ; CHECK-NEXT: for.body92.preheader: +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> , i16 [[TMP0:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP5]], <4 x i32> ; CHECK-NEXT: br label [[FOR_BODY92:%.*]] ; CHECK: for.body92: -; CHECK-NEXT: [[CONV177_I:%.*]] = sext i16 0 to i32 -; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> , i32 [[CONV177_I]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP1]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP6]] ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr undef, align 8 ; CHECK-NEXT: br label [[FOR_BODY92]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll index 5a9ea0d292fa..7ce45b872fae 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll @@ -1,7 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-9999 < %s | FileCheck %s -; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-9999\ -; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED define i64 @foo() { ; CHECK-LABEL: define i64 @foo() { @@ -23,25 +21,6 @@ define i64 @foo() { ; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[OR]], 0 ; CHECK-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]] ; -; FORCED-LABEL: define i64 @foo() { -; FORCED-NEXT: bb: -; FORCED-NEXT: [[ADD7:%.*]] = add i64 0, 0 -; FORCED-NEXT: br label [[BB3:%.*]] -; FORCED: bb1: -; FORCED-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ] -; FORCED-NEXT: ret i64 0 -; FORCED: bb3: -; FORCED-NEXT: [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ] -; FORCED-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ] -; FORCED-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> , <2 x i32> -; FORCED-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> , i64 [[PHI5]], i32 0 -; FORCED-NEXT: [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]] -; FORCED-NEXT: [[TMP5]] = add <2 x i64> [[TMP1]], [[TMP2]] -; FORCED-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]] -; FORCED-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 -; FORCED-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP8]], 0 -; FORCED-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]] -; bb: br label %bb3 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll index c5442b7fb7f1..6e656ba942a6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll @@ -1,7 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-9999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s -; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-9999 -mtriple=x86_64-unknown-linux-gnu\ -; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED define void @foo() { ; CHECK-LABEL: define void @foo() { @@ -21,23 +19,6 @@ define void @foo() { ; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP2]], [[BB4]] ] ; CHECK-NEXT: ret void ; -; FORCED-LABEL: define void @foo() { -; FORCED-NEXT: bb: -; FORCED-NEXT: br label [[BB1:%.*]] -; FORCED: bb1: -; FORCED-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ] -; FORCED-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], zeroinitializer -; FORCED-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> -; FORCED-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer -; FORCED-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 -; FORCED-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]]) -; FORCED-NEXT: br label [[BB4]] -; FORCED: bb4: -; FORCED-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]] -; FORCED: bb5: -; FORCED-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP2]], [[BB4]] ] -; FORCED-NEXT: ret void -; bb: br label %bb1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll index 93258f2975f3..098a2cd02cae 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll @@ -160,9 +160,9 @@ define void @subadd_and_external_users(ptr %A, ptr %ptr) { ; CHECK-NEXT: [[LD:%.*]] = load double, ptr undef, align 8 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], ; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP6]], <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = fsub <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], ; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[A:%.*]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation5.ll b/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation5.ll index 6fea312b99b2..8f29f3f8de46 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation5.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation5.ll @@ -7,10 +7,13 @@ define i32 @test(i32 %0, i32 %1) { ; CHECK-LABEL: define i32 @test( ; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[DOTNEG_NEG:%.*]] = shl i32 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> , i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = add <2 x i32> [[TMP3]], [[TMP12]] +; CHECK-NEXT: [[TMP21:%.*]] = shl <2 x i32> [[TMP3]], [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP20]], <2 x i32> [[TMP21]], <2 x i32> +; CHECK-NEXT: [[DOTNEG_NEG:%.*]] = shl i32 [[TMP0]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @st, i64 12), align 4 ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr getelementptr inbounds nuw (i8, ptr @st, i64 8), align 8 @@ -21,8 +24,6 @@ define i32 @test(i32 %0, i32 %1) { ; CHECK-NEXT: [[SUB120_3:%.*]] = or i32 [[TMP5]], [[DOTNEG_NEG]] ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> , i32 [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = shl <4 x i32> [[TMP10]], -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[ADD110]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[DOTNEG_NEG]], i32 1 ; CHECK-NEXT: [[TMP14:%.*]] = sub <2 x i32> zeroinitializer, [[TMP13]] ; CHECK-NEXT: store <2 x i32> [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @st, i64 32), align 16 ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> , <4 x i32> @@ -65,11 +66,11 @@ define i32 @test1(ptr %0, ptr %1, i32 %2) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[ADD53_1:%.*]] = add i32 [[TMP4]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> , i32 [[ADD53_1]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[ADD53_1:%.*]] = add i32 [[TMP4]], [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP7]], splat (i32 1) +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> , i32 [[ADD53_1]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP8]], <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP5]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll b/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll index 1c4de256468c..d6edf69882e8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll @@ -7,14 +7,14 @@ define void @rftbsub(ptr %a) { ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 2 ; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 2, 1 ; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[SUB22:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 8 ; CHECK-NEXT: [[ADD16:%.*]] = fadd double [[TMP1]], undef ; CHECK-NEXT: [[MUL18:%.*]] = fmul double undef, [[ADD16]] -; CHECK-NEXT: [[ADD19:%.*]] = fadd double undef, [[MUL18]] -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[ADD19]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[SUB22]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[MUL18]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> undef, [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP7]], <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[ARRAYIDX6]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/small-graph-diff-block-instructions.ll b/llvm/test/Transforms/SLPVectorizer/X86/small-graph-diff-block-instructions.ll index 82c8b1d707cf..62c40cd5810b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/small-graph-diff-block-instructions.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/small-graph-diff-block-instructions.ll @@ -6,12 +6,17 @@ define i32 @test(i32 %arg, i32 %arg1) { ; CHECK-SAME: i32 [[ARG:%.*]], i32 [[ARG1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[ARG1]] to i64 -; CHECK-NEXT: [[ZEXT2:%.*]] = zext i32 [[ARG]] to i64 -; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[ARG]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[ARG]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> ; CHECK-NEXT: br label %[[BB3:.*]] ; CHECK: [[BB3]]: ; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ZEXT]] +; CHECK-NEXT: [[ZEXT2:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; CHECK-NEXT: [[GETELEMENTPTR4:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ZEXT2]] +; CHECK-NEXT: [[SEXT:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 ; CHECK-NEXT: [[GETELEMENTPTR5:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[SEXT]] ; CHECK-NEXT: [[ZEXT6:%.*]] = zext i32 0 to i64 ; CHECK-NEXT: [[GETELEMENTPTR7:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ZEXT6]] diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll index b23da5fa263f..39c7602c9582 100644 --- a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll +++ b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll @@ -32,11 +32,12 @@ define <2 x float> @test_frem(float %a, i1 %cmp) { define <2 x float> @replace_through_casts(i16 %inp) { ; CHECK-LABEL: define <2 x float> @replace_through_casts( ; CHECK-SAME: i16 [[INP:%.*]]) { -; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP]], -10 -; CHECK-NEXT: [[TMP1:%.*]] = uitofp i16 [[INP]] to float -; CHECK-NEXT: [[TMP2:%.*]] = sitofp i16 [[ADD]] to float -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i64 0 -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x float> +; CHECK-NEXT: [[TMP5:%.*]] = sitofp <2 x i16> [[TMP3]] to <2 x float> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> ; CHECK-NEXT: ret <2 x float> [[R]] ; %add = add nsw i16 %inp, -10 @@ -50,10 +51,11 @@ define <2 x float> @replace_through_casts(i16 %inp) { define <2 x float> @replace_through_casts_and_binop(i16 %inp) { ; CHECK-LABEL: define <2 x float> @replace_through_casts_and_binop( ; CHECK-SAME: i16 [[INP:%.*]]) { -; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP]], -10 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i16 [[INP]], 5 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[MUL]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> [[TMP1]], i16 [[ADD]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = mul nsw <2 x i16> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i16> [[TMP6]], +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP7]], <2 x i16> [[TMP8]], <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = uitofp <2 x i16> [[TMP2]] to <2 x float> ; CHECK-NEXT: [[TMP4:%.*]] = sitofp <2 x i16> [[TMP2]] to <2 x float> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <2 x i32> @@ -117,11 +119,12 @@ define <2 x float> @replace_through_casts_through_splat(i16 %inp) { define <2 x i32> @replace_through_int_casts(i16 %inp, <2 x i16> %dead) { ; CHECK-LABEL: define <2 x i32> @replace_through_int_casts( ; CHECK-SAME: i16 [[INP:%.*]], <2 x i16> [[DEAD:%.*]]) { -; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP]], -10 -; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[INP]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = sext i16 [[ADD]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i64 0 -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i16> [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i16> [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[R]] ; %add = add nsw i16 %inp, -10 @@ -135,10 +138,11 @@ define <2 x i32> @replace_through_int_casts(i16 %inp, <2 x i16> %dead) { define <2 x i32> @replace_through_int_casts_ele0_only(i16 %inp, <2 x i16> %dead) { ; CHECK-LABEL: define <2 x i32> @replace_through_int_casts_ele0_only( ; CHECK-SAME: i16 [[INP:%.*]], <2 x i16> [[DEAD:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[INP]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[INP]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0 -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[R]] ; %2 = sext i16 %inp to i32 @@ -171,11 +175,12 @@ define <2 x i8> @replace_through_binop_fail_cant_speculate(i8 %inp, <2 x i8> %d, define <2 x i8> @replace_through_binop_preserve_flags(i8 %inp, <2 x i8> %d, <2 x i8> %any) { ; CHECK-LABEL: define <2 x i8> @replace_through_binop_preserve_flags( ; CHECK-SAME: i8 [[INP:%.*]], <2 x i8> [[D:%.*]], <2 x i8> [[ANY:%.*]]) { -; CHECK-NEXT: [[ADD:%.*]] = xor i8 [[INP]], 5 -; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[INP]], 123 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw i8 [[ADD]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i8> poison, i8 [[TMP1]], i64 0 -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i8> [[TMP3]], i8 [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i8> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i8> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i8> [[TMP3]], +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP4]], <2 x i8> [[TMP5]], <2 x i32> ; CHECK-NEXT: ret <2 x i8> [[R]] ; %add = xor i8 %inp, 5 diff --git a/llvm/test/Transforms/SLPVectorizer/ext-int-reduced-not-operand.ll b/llvm/test/Transforms/SLPVectorizer/ext-int-reduced-not-operand.ll index d8021538252c..b1c1623b070d 100644 --- a/llvm/test/Transforms/SLPVectorizer/ext-int-reduced-not-operand.ll +++ b/llvm/test/Transforms/SLPVectorizer/ext-int-reduced-not-operand.ll @@ -1,29 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s %} -; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999\ -; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED %} ; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s %} -; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu -slp-threshold=-99999\ -; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED %} define i64 @wombat() { -; FORCED-LABEL: define i64 @wombat() { -; FORCED-NEXT: bb: -; FORCED-NEXT: br label [[BB2:%.*]] -; FORCED: bb1: -; FORCED-NEXT: br label [[BB2]] -; FORCED: bb2: -; FORCED-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ 0, [[BB1:%.*]] ] -; FORCED-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[PHI]], i32 0 -; FORCED-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer -; FORCED-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i1> -; FORCED-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 -; FORCED-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i64 -; FORCED-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 -; FORCED-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i64 -; FORCED-NEXT: [[OR:%.*]] = or i64 [[TMP4]], [[TMP6]] -; FORCED-NEXT: ret i64 [[OR]] -; ; CHECK-LABEL: define i64 @wombat() { ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB2:%.*]] @@ -31,8 +10,13 @@ define i64 @wombat() { ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: ; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ 0, [[BB1:%.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[PHI]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[PHI]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[PHI]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i64 ; CHECK-NEXT: [[OR:%.*]] = or i64 [[TMP4]], [[TMP6]] ; CHECK-NEXT: ret i64 [[OR]] ;