[SLP] Loop aware cost model/tree building

Currently, SLP vectorizer do not care about loops and their trip count.
It may lead to inefficient vectorization in some cases. Patch adds loop
nest-aware tree building and cost estimation.
When it comes to tree building, it now checks that tree do not span
across different loop nests. The nodes from other loop nests are
immediate buildvector nodes.
The cost model adds the knowledge about loop trip count. If it is
unknown, the default value is used, controlled by the
-slp-cost-loop-min-trip-count=<value> option. The cost of the vector
nodes in the loop is multiplied by the number of iteration (trip count),
because each vector node will be executed the trip count number of
times. This allows better cost estimation.

Original Reviewers:
jdenny-ornl, vporpo, hiraditya, RKSimon

Original PR: https://github.com/llvm/llvm-project/pull/150450

Recommit after revert in c7bd3062f1dac975cf9b706f457b3c55b4bf57ff and in 4e500bd0015042b0cd4b7c87b81caeea06072d24

Reviewers: 

Pull Request: https://github.com/llvm/llvm-project/pull/187391
This commit is contained in:
Alexey Bataev 2026-03-18 17:54:01 -04:00 committed by GitHub
parent 89657f726f
commit abdcde9bbc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
35 changed files with 739 additions and 449 deletions

View File

@ -218,6 +218,11 @@ static cl::opt<bool> VectorizeCopyableElements(
cl::desc("Try to replace values with the idempotent instructions for "
"better vectorization."));
static cl::opt<unsigned> LoopAwareTripCount(
"slp-cost-loop-trip-count", cl::init(2), cl::Hidden,
cl::desc("Loop trip count, considered by the cost model during "
"modeling (0=loops are ignored and considered flat code)"));
// Limit the number of alias checks. The limit is chosen so that
// it has no negative effect on the llvm benchmarks.
static const unsigned AliasedCheckLimit = 10;
@ -2163,6 +2168,7 @@ public:
PostponedGathers.clear();
ValueToGatherNodes.clear();
TreeEntryToStridedPtrInfoMap.clear();
CurrentLoopNest.clear();
}
unsigned getTreeSize() const { return VectorizableTree.size(); }
@ -3765,6 +3771,18 @@ private:
TargetTransformInfo::CastContextHint
getCastContextHint(const TreeEntry &TE) const;
/// \returns the scale of the given tree entry to the loop iteration.
/// \p Scalar is the scalar value from the entry, if using the parent for the
/// external use.
/// \p U is the user of the vectorized value from the entry, if using the
/// parent for the external use.
unsigned getScaleToLoopIterations(const TreeEntry &TE,
Value *Scalar = nullptr,
Instruction *U = nullptr);
/// Get the loop nest for the given loop \p L.
ArrayRef<const Loop *> getLoopNest(const Loop *L);
/// \returns the cost of the vectorizable entry.
InstructionCost getEntryCost(const TreeEntry *E,
ArrayRef<Value *> VectorizedVals,
@ -4144,6 +4162,9 @@ private:
copy(OpVL, Operands[OpIdx].begin());
}
/// Maps values to their lanes in the node.
mutable SmallDenseMap<Value *, unsigned> ValueToLane;
public:
/// Returns interleave factor for interleave nodes.
unsigned getInterleaveFactor() const { return InterleaveFactor; }
@ -4243,7 +4264,10 @@ private:
/// When ReuseReorderShuffleIndices is empty it just returns position of \p
/// V within vector of Scalars. Otherwise, try to remap on its reuse index.
unsigned findLaneForValue(Value *V) const {
unsigned FoundLane = getVectorFactor();
auto Res = ValueToLane.try_emplace(V, getVectorFactor());
if (!Res.second)
return Res.first->second;
unsigned &FoundLane = Res.first->getSecond();
for (auto *It = find(Scalars, V), *End = Scalars.end(); It != End;
std::advance(It, 1)) {
if (*It != V)
@ -4703,6 +4727,17 @@ private:
std::tuple<SmallVector<int>, VectorType *, unsigned, bool>>
CompressEntryToData;
/// The loop nest, used to check if only a single loop nest is vectorized, not
/// multiple, to avoid side-effects from the loop-aware cost model.
SmallVector<const Loop *> CurrentLoopNest;
/// Maps the loops to their loop nests.
SmallDenseMap<const Loop *, SmallVector<const Loop *>> LoopToLoopNest;
/// Maps the loops to their scale factor, which is built as a multiplication
/// of the tripcounts of the loops in the loop nest.
SmallDenseMap<const Loop *, unsigned> LoopToScaleFactor;
/// This POD struct describes one external user in the vectorized tree.
struct ExternalUser {
ExternalUser(Value *S, llvm::User *U, const TreeEntry &E, unsigned L)
@ -10290,6 +10325,39 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
return {IntrinsicCost, LibCost};
}
/// Find the innermost loop starting from \p L, for which at least a single
/// value in \p VL is not invariant.
static const Loop *findInnermostNonInvariantLoop(const Loop *L,
ArrayRef<Value *> VL) {
assert(L && "Expected valid loop");
auto IsLoopInvariant = [&](const Loop *L, ArrayRef<Value *> VL) {
return all_of(VL, [&](Value *V) {
return isa<Constant>(V) || !isa<Instruction>(V) || L->isLoopInvariant(V);
});
};
while (L && IsLoopInvariant(L, VL))
L = L->getParentLoop();
return L;
}
/// Get the loop nest for the given loop.
ArrayRef<const Loop *> BoUpSLP::getLoopNest(const Loop *L) {
assert(L && "Expected valid loop");
if (LoopAwareTripCount == 0)
return {};
SmallVector<const Loop *> &Res =
LoopToLoopNest.try_emplace(L).first->getSecond();
if (!Res.empty())
return Res;
SmallVector<const Loop *> LoopNest;
while (L) {
LoopNest.push_back(L);
L = L->getParentLoop();
}
Res.assign(LoopNest.rbegin(), LoopNest.rend());
return Res;
}
BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
const InstructionsState &S, ArrayRef<Value *> VL,
bool IsScatterVectorizeUserTE, OrdersType &CurrentOrder,
@ -11896,6 +11964,44 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
return;
}
// Check the loop nest. We need to be sure we handle a single loop nest at a
// time to avoid incorrect cost estimation because of the loop aware cost
// model.
if (VectorizableTree.empty()) {
assert(CurrentLoopNest.empty() && "Expected empty loop nest");
// Process the first node? Initial fill of the loop nest.
BasicBlock *Parent = S.getMainOp()->getParent();
if (const Loop *L = LI->getLoopFor(Parent)) {
L = findInnermostNonInvariantLoop(L, VL);
if (L)
CurrentLoopNest.assign(getLoopNest(L));
}
} else if (!UserTreeIdx ||
UserTreeIdx.UserTE->State == TreeEntry::SplitVectorize ||
UserTreeIdx.UserTE->isGather() ||
UserTreeIdx.UserTE->getMainOp()->getParent() !=
S.getMainOp()->getParent()) {
BasicBlock *Parent = S.getMainOp()->getParent();
if (const Loop *L = LI->getLoopFor(Parent)) {
// Check that the new loop nest is not involved.
// Otherwise, mark it as a gather node.
L = findInnermostNonInvariantLoop(L, VL);
if (L) {
SmallVector<const Loop *> NewLoopNest(getLoopNest(L));
for (const auto [L1, L2] : zip(CurrentLoopNest, NewLoopNest)) {
if (L1 != L2) {
LLVM_DEBUG(dbgs() << "SLP: Different loop nest.\n");
newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
return;
}
}
if (NewLoopNest.size() > CurrentLoopNest.size())
CurrentLoopNest.append(std::next(NewLoopNest.begin(), CurrentLoopNest.size()),
NewLoopNest.end());
}
}
}
Instruction *VL0 = S.getMainOp();
BasicBlock *BB = VL0->getParent();
auto &BSRef = BlocksSchedules[BB];
@ -15080,6 +15186,78 @@ TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const {
return TTI::CastContextHint::None;
}
/// Get the assumed loop trip count for the loop \p L.
static unsigned getLoopTripCount(const Loop *L, ScalarEvolution &SE) {
if (LoopAwareTripCount == 0)
return 1;
unsigned Scale = SE.getSmallConstantTripCount(L);
if (Scale == 0)
Scale = getLoopEstimatedTripCount(const_cast<Loop *>(L)).value_or(0);
if (Scale != 0) {
// Multiple exiting blocks - choose the minimum between trip count (scale)
// and LoopAwareTripCount, since the multiple exit loops can be terminated
// early.
if (!L->getExitingBlock())
return std::min<unsigned>(LoopAwareTripCount, Scale);
return Scale;
}
return LoopAwareTripCount;
}
unsigned BoUpSLP::getScaleToLoopIterations(const TreeEntry &TE, Value *Scalar,
Instruction *U) {
BasicBlock *Parent = nullptr;
if (U) {
Parent = U->getParent();
} else if (TE.isGather() || TE.State == TreeEntry::SplitVectorize) {
EdgeInfo EI = TE.UserTreeIndex;
while (EI.UserTE) {
if (EI.UserTE->isGather() ||
EI.UserTE->State == TreeEntry::SplitVectorize) {
EI = EI.UserTE->UserTreeIndex;
continue;
}
if (EI.UserTE->State == TreeEntry::Vectorize &&
EI.UserTE->getOpcode() == Instruction::PHI) {
auto *PH = cast<PHINode>(EI.UserTE->getMainOp());
Parent = PH->getIncomingBlock(EI.EdgeIdx);
} else {
Parent = EI.UserTE->getMainOp()->getParent();
}
break;
}
if (!Parent)
return 1;
} else {
Parent = TE.getMainOp()->getParent();
}
if (const Loop *L = LI->getLoopFor(Parent)) {
const auto It = LoopToScaleFactor.find(L);
if (It != LoopToScaleFactor.end())
return It->second;
unsigned Scale = 1;
if (const Loop *NonInvL = findInnermostNonInvariantLoop(
L, Scalar ? ArrayRef(Scalar) : ArrayRef(TE.Scalars))) {
Scale = getLoopTripCount(NonInvL, *SE);
for (const Loop *LN : getLoopNest(NonInvL)) {
if (LN == L)
break;
auto LNRes = LoopToScaleFactor.try_emplace(LN, 0);
auto &LoopScale = LNRes.first->getSecond();
if (!LNRes.second) {
Scale *= LoopScale;
break;
}
Scale *= getLoopTripCount(LN, *SE);
LoopScale = Scale;
}
}
LoopToScaleFactor.try_emplace(L, Scale);
return Scale;
}
return 1;
}
InstructionCost
BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
SmallPtrSetImpl<Value *> &CheckedExtracts) {
@ -16292,11 +16470,24 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
allConstant(VectorizableTree[1]->Scalars))))
return true;
// The tree with only 3 nodes, where 2 last are gathers/buildvectors, not
// profitable for vectorization.
constexpr int Limit = 4;
if (VectorizableTree.size() == 3 && SLPCostThreshold == 0 &&
(!ForReduction || VectorizableTree.front()->getVectorFactor() <= 2) &&
all_of(ArrayRef(VectorizableTree).drop_front(),
[&](const std::unique_ptr<TreeEntry> &TE) {
return TE->isGather() && TE->getVectorFactor() <= Limit &&
!all_of(
TE->Scalars,
IsaPred<ExtractElementInst, UndefValue, Constant>);
}))
return true;
// If the graph includes only PHI nodes and gathers, it is defnitely not
// profitable for the vectorization, we can skip it, if the cost threshold is
// default. The cost of vectorized PHI nodes is almost always 0 + the cost of
// gathers/buildvectors.
constexpr int Limit = 4;
if (!ForReduction && !SLPCostThreshold.getNumOccurrences() &&
!VectorizableTree.empty() &&
all_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
@ -16332,6 +16523,51 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
}))
return true;
// PHI nodes only and gathers cannot be vectorized, skip.
constexpr unsigned LargeTree = 20;
bool HasSingleLoad = false;
if (!ForReduction && SLPCostThreshold >= 0 &&
all_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
bool PrevLoad = HasSingleLoad;
HasSingleLoad |=
TE->hasState() && !TE->isGather() &&
(TE->getOpcode() == Instruction::Load ||
TE->hasCopyableElements()) &&
(TE->getVectorFactor() > 2 || TE->ReorderIndices.empty());
return (TE->hasState() &&
(TE->getOpcode() == Instruction::PHI ||
(VectorizableTree.size() >= LargeTree &&
(TE->getOpcode() == Instruction::Store ||
(TE->getOpcode() == Instruction::Load && !PrevLoad)) &&
TE->getVectorFactor() <= Limit))) ||
(TE->isGather() &&
(!TE->hasState() ||
TE->getOpcode() != Instruction::ExtractElement));
}))
return true;
// Single non-phi vector node - skip the tree.
bool VectorNodeFound = false;
bool AnyNonConst = false;
if (!ForReduction && SLPCostThreshold >= 0 && VectorizableTree.size() >= 5 &&
VectorizableTree.front()->getVectorFactor() <= 2 &&
VectorizableTree.front()->Scalars.front()->getType()->isIntegerTy() &&
all_of(VectorizableTree,
[&](const std::unique_ptr<TreeEntry> &TE) {
if (TE->State == TreeEntry::Vectorize && TE->hasState()) {
if (TE->hasState() && (TE->getOpcode() == Instruction::PHI ||
!TE->ReorderIndices.empty()))
return true;
bool PrevVectorNodeFound = VectorNodeFound;
VectorNodeFound = true;
return !PrevVectorNodeFound;
}
AnyNonConst |= !allConstant(TE->Scalars);
return TE->isGather() || TE->State == TreeEntry::SplitVectorize;
}) &&
AnyNonConst)
return true;
// If the tree contains only phis, buildvectors, split nodes and
// small nodes with reuses, we can skip it.
SmallVector<const TreeEntry *> StoreLoadNodes;
@ -16586,10 +16822,14 @@ InstructionCost BoUpSLP::getSpillCost() {
if (It != MinBWs.end())
ScalarTy = IntegerType::get(ScalarTy->getContext(), It->second.first);
auto *VecTy = getWidenedType(ScalarTy, Op->getVectorFactor());
Cost += TTI->getCostOfKeepingLiveOverCall(VecTy);
unsigned Scale = getScaleToLoopIterations(*Op);
InstructionCost KeepLiveCost = TTI->getCostOfKeepingLiveOverCall(VecTy);
KeepLiveCost *= Scale;
Cost += KeepLiveCost;
if (ScalarTy->isVectorTy()) {
// Handle revec dead vector instructions.
Cost -= Op->Scalars.size() * TTI->getCostOfKeepingLiveOverCall(ScalarTy);
Cost -= Op->Scalars.size() * TTI->getCostOfKeepingLiveOverCall(ScalarTy) *
Scale;
}
};
// Memoize the relationship between blocks, i.e. if there is (at least one)
@ -16946,6 +17186,9 @@ InstructionCost BoUpSLP::calculateTreeCostAndTrimNonProfitable(
};
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost Cost = 0;
SmallDenseMap<const TreeEntry *, unsigned> EntryToScale;
unsigned PrevScale = 0;
BasicBlock *PrevVecParent = nullptr;
for (const std::unique_ptr<TreeEntry> &Ptr : VectorizableTree) {
TreeEntry &TE = *Ptr;
// No need to count the cost for combined entries, they are combined and
@ -16979,6 +17222,24 @@ InstructionCost BoUpSLP::calculateTreeCostAndTrimNonProfitable(
"Expected gather nodes with users only.");
InstructionCost C = getEntryCost(&TE, VectorizedVals, CheckedExtracts);
unsigned Scale = 0;
bool CostIsFree = C == 0;
if (!CostIsFree && !TE.isGather() && TE.hasState()) {
if (PrevVecParent == TE.getMainOp()->getParent()) {
Scale = PrevScale;
C *= Scale;
EntryToScale.try_emplace(&TE, Scale);
}
}
if (!CostIsFree && !Scale) {
Scale = getScaleToLoopIterations(TE);
C *= Scale;
EntryToScale.try_emplace(&TE, Scale);
if (!TE.isGather() && TE.hasState()) {
PrevVecParent = TE.getMainOp()->getParent();
PrevScale = Scale;
}
}
Cost += C;
NodesCosts.try_emplace(&TE, C);
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle "
@ -17006,6 +17267,12 @@ InstructionCost BoUpSLP::calculateTreeCostAndTrimNonProfitable(
InstructionCost ExtCost = ::getScalarizationOverhead(
*TTI, ScalarTy, VecTy, DemandedElts, /*Insert=*/false,
/*Extract=*/true, CostKind);
if (ExtCost.isValid() && ExtCost != 0) {
if (!Scale)
Scale = getScaleToLoopIterations(TE);
ExtCost *= Scale;
EntryToScale.try_emplace(&TE, Scale);
}
ExtractCosts.try_emplace(&TE, ExtCost);
}
}
@ -17305,6 +17572,14 @@ InstructionCost BoUpSLP::calculateTreeCostAndTrimNonProfitable(
if (!NodesCosts.contains(TE.get())) {
InstructionCost C =
getEntryCost(TE.get(), VectorizedVals, CheckedExtracts);
if (!C.isValid() || C == 0) {
NodesCosts.try_emplace(TE.get(), C);
continue;
}
unsigned Scale = EntryToScale.lookup(TE.get());
if (!Scale)
Scale = getScaleToLoopIterations(*TE.get());
C *= Scale;
NodesCosts.try_emplace(TE.get(), C);
}
}
@ -17356,7 +17631,39 @@ template <typename T> struct ShuffledInsertData {
InstructionCost BoUpSLP::getTreeCost(InstructionCost TreeCost,
ArrayRef<Value *> VectorizedVals,
InstructionCost ReductionCost) {
InstructionCost Cost = TreeCost + ReductionCost;
InstructionCost Cost = TreeCost;
SmallDenseMap<const TreeEntry *, unsigned> EntryToScale;
auto ScaleCost = [&](InstructionCost C, const TreeEntry &TE,
Value *Scalar = nullptr, Instruction *U = nullptr) {
if (!C.isValid() || C == 0)
return C;
unsigned &Scale = EntryToScale.try_emplace(&TE, 0).first->getSecond();
if (!Scale)
Scale = getScaleToLoopIterations(TE, Scalar, U);
return C * Scale;
};
Instruction *ReductionRoot = nullptr;
if (UserIgnoreList) {
const auto It = find_if(*UserIgnoreList, IsaPred<Instruction>);
assert(It != UserIgnoreList->end() && "Expected reduction instruction.");
ReductionRoot = cast<Instruction>(*It);
// Scale reduction cost to the factor of the loop nest trip count.
ReductionCost = ScaleCost(ReductionCost, *VectorizableTree.front().get(),
/*Scalar=*/nullptr, ReductionRoot);
}
// Add the cost for reduction.
Cost += ReductionCost;
// Skip trees, which are non-profitable even if there are insertelements with
// external uses.
constexpr unsigned CostLimit = 100;
if (Cost >= -SLPCostThreshold + CostLimit &&
(VectorizableTree.size() - DeletedNodes.size()) *
VectorizableTree.front()->getVectorFactor() <
CostLimit)
return Cost;
if (Cost >= -SLPCostThreshold &&
none_of(ExternalUses, [](const ExternalUser &EU) {
@ -17647,6 +17954,9 @@ InstructionCost BoUpSLP::getTreeCost(InstructionCost TreeCost,
}
}
ExtraCost = ScaleCost(ExtraCost, *Entry, EU.Scalar,
cast_or_null<Instruction>(EU.User));
ExtractCost += ExtraCost;
}
// Insert externals for extract of operands of casts to be emitted as scalars
@ -17683,9 +17993,12 @@ InstructionCost BoUpSLP::getTreeCost(InstructionCost TreeCost,
assert(SLPReVec && "Only supported by REVEC.");
SrcTy = getWidenedType(SrcTy, VecTy->getNumElements());
}
Cost += TTI->getCastInstrCost(Opcode, DstTy, SrcTy,
TTI::CastContextHint::None,
TTI::TCK_RecipThroughput);
InstructionCost CastCost =
TTI->getCastInstrCost(Opcode, DstTy, SrcTy,
TTI::CastContextHint::None,
TTI::TCK_RecipThroughput);
CastCost = ScaleCost(CastCost, Root, /*Scalar=*/nullptr, ReductionRoot);
Cost += CastCost;
}
}
}
@ -17768,6 +18081,7 @@ InstructionCost BoUpSLP::getTreeCost(InstructionCost TreeCost,
})) {
InstructionCost C =
::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc, FTy, Mask);
C = ScaleCost(C, *TEs.front());
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
<< " for final shuffle of insertelement "
"external users.\n";
@ -17786,6 +18100,7 @@ InstructionCost BoUpSLP::getTreeCost(InstructionCost TreeCost,
auto *FTy = getWidenedType(TEs.back()->Scalars.front()->getType(), VF);
InstructionCost C =
::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, FTy, Mask);
C = ScaleCost(C, *TEs.back());
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
<< " for final shuffle of vector node and external "
"insertelement users.\n";
@ -17839,7 +18154,6 @@ InstructionCost BoUpSLP::getTreeCost(InstructionCost TreeCost,
auto *DstVecTy =
getWidenedType(Builder.getIntNTy(DstSize), E.getVectorFactor());
TTI::CastContextHint CCH = getCastContextHint(E);
InstructionCost CastCost;
switch (E.getOpcode()) {
case Instruction::SExt:
case Instruction::ZExt:
@ -17851,8 +18165,11 @@ InstructionCost BoUpSLP::getTreeCost(InstructionCost TreeCost,
default:
break;
}
CastCost += TTI->getCastInstrCost(Opcode, DstVecTy, SrcVecTy, CCH,
TTI::TCK_RecipThroughput);
InstructionCost CastCost =
TTI->getCastInstrCost(Opcode, DstVecTy, SrcVecTy, CCH,
TTI::TCK_RecipThroughput);
CastCost = ScaleCost(CastCost, *VectorizableTree.front().get(),
/*Scalar=*/nullptr, ReductionRoot);
Cost += CastCost;
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << CastCost
<< " for final resize for reduction from " << SrcVecTy
@ -17876,8 +18193,10 @@ InstructionCost BoUpSLP::getTreeCost(InstructionCost TreeCost,
OS << *SpillCost;
else
OS << "<skipped>";
OS << ".\nSLP: Extract Cost = " << ExtractCost << ".\n"
<< "SLP: Total Cost = " << Cost << ".\n";
OS << ".\nSLP: Extract Cost = " << ExtractCost << ".\n";
if (ReductionRoot)
OS << "SLP: Reduction Cost = " << ReductionCost << ".\n";
OS << "SLP: Total Cost = " << Cost << ".\n";
}
LLVM_DEBUG(dbgs() << Str);
if (ViewSLPTree)
@ -19143,6 +19462,10 @@ Value *BoUpSLP::gather(
auto &&CreateInsertElement = [this](Value *Vec, Value *V, unsigned Pos,
Type *Ty) {
Value *Scalar = V;
// Drop NUW from trunc to avoid incorrect codegen.
Value *Trunced;
if (match(Scalar, m_NUWTrunc(m_Value(Trunced))))
cast<TruncInst>(Scalar)->setHasNoUnsignedWrap(/*B=*/false);
if (Scalar->getType() != Ty) {
assert(Scalar->getType()->isIntOrIntVectorTy() &&
Ty->isIntOrIntVectorTy() && "Expected integer types only.");
@ -20927,11 +21250,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
MinBWs.contains(getOperandEntry(E, 0)) ||
MinBWs.contains(getOperandEntry(E, 1))) &&
"Expected item in MinBWs.");
if (cast<VectorType>(L->getType())
->getElementType()
->getIntegerBitWidth() < cast<VectorType>(R->getType())
->getElementType()
->getIntegerBitWidth()) {
const unsigned LBW = cast<VectorType>(L->getType())
->getElementType()
->getIntegerBitWidth();
const unsigned RBW = cast<VectorType>(R->getType())
->getElementType()
->getIntegerBitWidth();
if ((LBW < RBW && !allConstant(E->getOperand(1))) ||
(LBW > RBW && allConstant(E->getOperand(0)))) {
Type *CastTy = R->getType();
L = Builder.CreateIntCast(L, CastTy, GetOperandSignedness(0));
} else {
@ -23644,8 +23970,7 @@ bool BoUpSLP::collectValuesToDemote(
if (Res && E.isGather()) {
if (E.hasState()) {
if (const TreeEntry *SameTE =
getSameValuesTreeEntry(E.getMainOp(), E.Scalars);
SameTE)
getSameValuesTreeEntry(E.getMainOp(), E.Scalars))
if (collectValuesToDemote(*SameTE, IsProfitableToDemoteRoot, BitWidth,
ToDemote, Visited, NodesToKeepBWs,
MaxDepthLevel, IsProfitableToDemote,
@ -26244,7 +26569,8 @@ public:
ReductionCost = 0;
else
ReductionCost =
getReductionCost(TTI, VL, IsCmpSelMinMax, RdxFMF, V, DT, DL, TLI);
getReductionCost(TTI, VL, SameValuesCounter, IsCmpSelMinMax,
RdxFMF, V, DT, DL, TLI);
InstructionCost Cost = V.getTreeCost(TreeCost, VL, ReductionCost);
LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost
<< " for reduction\n");
@ -26564,12 +26890,11 @@ private:
}
/// Calculate the cost of a reduction.
InstructionCost getReductionCost(TargetTransformInfo *TTI,
ArrayRef<Value *> ReducedVals,
bool IsCmpSelMinMax, FastMathFlags FMF,
const BoUpSLP &R, DominatorTree &DT,
const DataLayout &DL,
const TargetLibraryInfo &TLI) {
InstructionCost getReductionCost(
TargetTransformInfo *TTI, ArrayRef<Value *> ReducedVals,
const SmallMapVector<Value *, unsigned, 16> SameValuesCounter,
bool IsCmpSelMinMax, FastMathFlags FMF, const BoUpSLP &R,
DominatorTree &DT, const DataLayout &DL, const TargetLibraryInfo &TLI) {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
Type *ScalarTy = ReducedVals.front()->getType();
unsigned ReduxWidth = ReducedVals.size();
@ -26585,11 +26910,15 @@ private:
for (Value *RdxVal : ReducedVals) {
if (!isa<Instruction>(RdxVal))
continue;
if (Cnt == 1)
if (Cnt == 1) {
unsigned SameValueCount = SameValuesCounter.lookup(RdxVal);
Cost += (SameValueCount ? SameValueCount - 1 : 0) * GenCostFn();
break;
}
--Cnt;
if (RdxVal->hasNUsesOrMore(IsCmpSelMinMax ? 3 : 2)) {
Cost += GenCostFn();
unsigned SameValueCount = SameValuesCounter.lookup(RdxVal);
Cost += (SameValueCount ? SameValueCount : 1) * GenCostFn();
continue;
}
InstructionCost ScalarCost = 0;

View File

@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer -slp-threshold=-12 -pass-remarks-output=%t < %s | FileCheck %s
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer -slp-threshold=-13 -pass-remarks-output=%t < %s | FileCheck %s
; RUN: cat %t | FileCheck -check-prefix=YAML %s
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer -slp-threshold=-12 -pass-remarks-output=%t < %s | FileCheck %s
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer -slp-threshold=-13 -pass-remarks-output=%t < %s | FileCheck %s
; RUN: cat %t | FileCheck -check-prefix=YAML %s
; These tests check that we remove from consideration pairs of seed
@ -22,13 +22,13 @@
; YAML-LABEL: Function: getelementptr_4x32
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedList
; YAML-NEXT: Name: VectorizedHorizontalReduction
; YAML-NEXT: Function: getelementptr_4x32
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'SLP vectorized with cost '
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
; YAML-NEXT: - Cost: '6'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '3'
; YAML-NEXT: - TreeSize: '1'
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
@ -36,7 +36,7 @@
; YAML-NEXT: Function: getelementptr_4x32
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'SLP vectorized with cost '
; YAML-NEXT: - Cost: '8'
; YAML-NEXT: - Cost: '12'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '3'
@ -47,43 +47,38 @@ define i32 @getelementptr_4x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y
; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[X:%.*]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[Z:%.*]], i32 1
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: [[ADD16:%.*]] = extractelement <2 x i32> [[TMP17:%.*]], i32 0
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD16]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD16:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
; CHECK: for.body:
; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP17]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1
; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[SUM_032:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[ADD16]], [[FOR_BODY]] ]
; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[TMP15]], 1
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[T4]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP0]]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[T4]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], [[TMP0]]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i32 [[TMP12]]
; CHECK-NEXT: [[T6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], [[TMP16]]
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[G]], i32 [[TMP11]]
; CHECK-NEXT: [[T8:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]]
; CHECK-NEXT: [[TMP18:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP18]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[T4]], [[Y:%.*]]
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[G]], i32 [[TMP13]]
; CHECK-NEXT: [[T10:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4
; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]]
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP18]], i32 1
; CHECK-NEXT: [[TMP14:%.*]] = add nsw i32 [[T4]], [[Z:%.*]]
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[G]], i32 [[TMP14]]
; CHECK-NEXT: [[T12:%.*]] = load i32, ptr [[ARRAYIDX15]], align 4
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[ADD11]], i32 0
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[T12]], i32 0
; CHECK-NEXT: [[TMP17]] = add nsw <2 x i32> [[TMP19]], [[TMP20]]
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[T6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[T8]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T10]], i32 2
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[T12]], i32 3
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]])
; CHECK-NEXT: [[ADD16]] = add i32 [[TMP10]], [[SUM_032]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[TMP15]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
;
@ -133,7 +128,7 @@ for.body:
; YAML: Function: getelementptr_2x32
; YAML: Args:
; YAML: - String: 'SLP vectorized with cost '
; YAML: - Cost: '8'
; YAML: - Cost: '12'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '3'
@ -143,42 +138,36 @@ define i32 @getelementptr_2x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y
; CHECK-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[Z:%.*]], i32 1
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[Y:%.*]], i32 1
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: [[OP_RDX:%.*]] = extractelement <2 x i32> [[TMP13:%.*]], i32 0
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
; CHECK: for.body:
; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP13]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[SUM_032:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[OP_RDX]], [[FOR_BODY]] ]
; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[TMP12]], 1
; CHECK-NEXT: [[T5:%.*]] = add nsw i32 [[T4]], 0
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i32 [[T5]]
; CHECK-NEXT: [[T6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], [[TMP5]]
; CHECK-NEXT: [[T7:%.*]] = add nsw i32 [[T4]], 1
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[G]], i32 [[T7]]
; CHECK-NEXT: [[T8:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[T4]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], [[TMP0]]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[G]], i32 [[TMP9]]
; CHECK-NEXT: [[T10:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i32 [[TMP9]]
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX11]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[G]], i32 [[TMP6]]
; CHECK-NEXT: [[T10:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = add nsw i32 [[T4]], [[Z:%.*]]
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[G]], i32 [[TMP10]]
; CHECK-NEXT: [[T12:%.*]] = load i32, ptr [[ARRAYIDX15]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[ADD11]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[T12]], i32 0
; CHECK-NEXT: [[TMP13]] = add nsw <2 x i32> [[TMP11]], [[TMP14]]
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[T10]], i32 2
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T12]], i32 3
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP13]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP14]])
; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP11]], [[SUM_032]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[TMP12]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
;

View File

@ -15,7 +15,7 @@ target triple = "aarch64--linux"
; YAML-NEXT: Function: test_select
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
; YAML-NEXT: - Cost: '-19'
; YAML-NEXT: - Cost: '-38'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '10'
@ -135,7 +135,7 @@ define i32 @reduction_with_br(ptr noalias nocapture readonly %blk1, ptr noalias
; YAML-NEXT: Function: reduction_with_br
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
; YAML-NEXT: - Cost: '-10'
; YAML-NEXT: - Cost: '-20'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '3'
; CHECK-LABEL: @reduction_with_br(
@ -228,7 +228,7 @@ for.end: ; preds = %for.end.loopexit, %
; YAML-NEXT: Function: test_unrolled_select
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
; YAML-NEXT: - Cost: '-44'
; YAML-NEXT: - Cost: '-88'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '12'
@ -250,8 +250,7 @@ define i32 @test_unrolled_select(ptr noalias nocapture readonly %blk1, ptr noali
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P2_045]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i16> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i16> [[TMP4]] to <8 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <8 x i16> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = sub <8 x i16> zeroinitializer, [[TMP4]]
; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> [[TMP7]], <8 x i16> [[TMP4]]
; CHECK-NEXT: [[TMP9:%.*]] = sext <8 x i16> [[TMP8]] to <8 x i32>

View File

@ -8,7 +8,7 @@
; YAML-NEXT: Function: foo
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'SLP vectorized with cost '
; YAML-NEXT: - Cost: '2'
; YAML-NEXT: - Cost: '4'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '2'

View File

@ -9,8 +9,7 @@ define i1 @test(i32 %shr.i.i90, i32 %x) {
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[SHR_I_I90]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> [[TMP6]], <i32 2, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP1]], i1 true)
; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt <2 x i64> [[TMP3]], <i64 100, i64 300>
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt <2 x i32> [[TMP2]], <i32 100, i32 300>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
; CHECK-NEXT: ret i1 [[TMP5]]
;

View File

@ -37,29 +37,27 @@ define void @test() {
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I69]], i32 15
; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]]
; CHECK: [[BB77]]:
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 14, i32 15, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP13]], <8 x i32> <i32 8, i32 poison, i32 poison, i32 poison, i32 4, i32 5, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[I68]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[I66]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x float> poison, float [[I66]], i32 1
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x float> [[TMP12]], float [[I70]], i32 2
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x float> [[TMP13]], float [[I68]], i32 3
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[I69]], i32 4
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x float> [[TMP15]], float [[I67]], i32 7
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 3, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP20]], <16 x float> [[TMP0]], <16 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 18, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 19, i32 poison, i32 poison>
; CHECK-NEXT: br label %[[BB78:.*]]
; CHECK: [[BB78]]:
; CHECK-NEXT: [[TMP22:%.*]] = phi <8 x float> [ [[TMP14]], %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
; CHECK-NEXT: [[TMP32:%.*]] = phi <2 x float> [ [[TMP16]], %[[BB77]] ], [ [[TMP37:%.*]], %[[BB78]] ]
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x float> [[TMP22]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 1, i32 3, i32 5, i32 3, i32 1, i32 0, i32 4, i32 5, i32 5>
; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <8 x float> [[TMP22]], <8 x float> poison, <8 x i32> <i32 2, i32 poison, i32 0, i32 poison, i32 5, i32 4, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x float> [[TMP32]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <8 x float> [[TMP38]], <8 x float> [[TMP23]], <8 x i32> <i32 0, i32 9, i32 2, i32 8, i32 4, i32 5, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <8 x float> [[TMP41]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 1, i32 3, i32 5, i32 3, i32 1, i32 0, i32 4, i32 5, i32 5>
; CHECK-NEXT: [[TMP19:%.*]] = phi <8 x float> [ [[TMP16]], %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x float> [[TMP19]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP22]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 1, i32 3, i32 5, i32 3, i32 1, i32 0, i32 4, i32 5, i32 5>
; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP22]], <16 x float> poison, <8 x i32> <i32 2, i32 poison, i32 0, i32 poison, i32 5, i32 4, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <8 x float> [[TMP23]], <8 x float> [[TMP19]], <8 x i32> <i32 0, i32 13, i32 2, i32 14, i32 4, i32 5, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x float> [[TMP32]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 1, i32 3, i32 5, i32 3, i32 1, i32 0, i32 4, i32 5, i32 5>
; CHECK-NEXT: [[TMP27:%.*]] = fmul fast <16 x float> [[TMP25]], [[TMP0]]
; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], [[TMP26]]
; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison
; CHECK-NEXT: [[TMP30:%.*]] = fadd fast <16 x float> [[TMP29]], poison
; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP30]], <16 x float> poison, <8 x i32> <i32 5, i32 11, i32 12, i32 10, i32 14, i32 15, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP37]] = shufflevector <16 x float> [[TMP30]], <16 x float> poison, <2 x i32> <i32 6, i32 7>
; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP30]], <16 x float> poison, <8 x i32> <i32 12, i32 7, i32 5, i32 6, i32 15, i32 11, i32 10, i32 14>
; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]]
; CHECK: [[BB167]]:
; CHECK-NEXT: [[TMP35:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP30]], %[[BB78]] ]

View File

@ -216,8 +216,8 @@ define void @slp_profitable_missing_fmf_nnans_only(ptr %A, ptr %B) {
define float @slp_not_profitable_in_loop(float %x, ptr %A) {
; CHECK-LABEL: @slp_not_profitable_in_loop(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 2
; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_A_2]], align 4
; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 2
; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[A1]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 4
; CHECK-NEXT: [[L_3:%.*]] = load float, ptr [[A]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> <float poison, float 3.000000e+00>, float [[X:%.*]], i32 0
@ -226,7 +226,7 @@ define float @slp_not_profitable_in_loop(float %x, ptr %A) {
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[RED_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[MUL12:%.*]] = fmul fast float 3.000000e+00, [[L_1]]
; CHECK-NEXT: [[MUL12:%.*]] = fmul fast float 3.000000e+00, [[L_2]]
; CHECK-NEXT: [[MUL16:%.*]] = fmul fast float 3.000000e+00, [[L_3]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[MUL12]], [[TMP3]]

View File

@ -9,7 +9,7 @@ define dso_local void @l(i1 %arg) local_unnamed_addr {
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP9:%.*]], [[BB25:%.*]] ]
; CHECK-NEXT: br i1 %arg, label [[BB3:%.*]], label [[BB11:%.*]]
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[BB3:%.*]], label [[BB11:%.*]]
; CHECK: bb3:
; CHECK-NEXT: [[I4:%.*]] = zext i1 undef to i32
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i16> [[TMP0]], undef
@ -18,8 +18,7 @@ define dso_local void @l(i1 %arg) local_unnamed_addr {
; CHECK: bb11:
; CHECK-NEXT: [[I12:%.*]] = zext i1 undef to i32
; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i16> [[TMP0]], undef
; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i16> [[TMP3]] to <2 x i64>
; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> undef, [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <2 x i16> undef, [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i1> [[TMP5]] to <2 x i32>
; CHECK-NEXT: [[TMP7:%.*]] = icmp ult <2 x i32> undef, [[TMP6]]
; CHECK-NEXT: br label [[BB25]]
@ -34,7 +33,7 @@ define dso_local void @l(i1 %arg) local_unnamed_addr {
; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
; CHECK-NEXT: [[I32:%.*]] = and i32 [[I31]], [[TMP13]]
; CHECK-NEXT: [[I33:%.*]] = and i32 [[I32]], [[I28]]
; CHECK-NEXT: br i1 %arg, label [[BB34:%.*]], label [[BB1]]
; CHECK-NEXT: br i1 [[ARG]], label [[BB34:%.*]], label [[BB1]]
; CHECK: bb34:
; CHECK-NEXT: [[I35:%.*]] = phi i32 [ [[I33]], [[BB25]] ]
; CHECK-NEXT: br label [[BB36:%.*]]

View File

@ -10,7 +10,7 @@ define void @test(ptr %__last.addr.011.i.i, ptr %call3) {
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> [[TMP0]], ptr [[CALL3]], i32 1
; CHECK-NEXT: br label %[[WHILE_BODY_I_I:.*]]
; CHECK: [[WHILE_BODY_I_I]]:
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x ptr> [ [[TMP1]], %[[NEWFUNCROOT]] ], [ [[TMP94:%.*]], %[[WHILE_BODY_I_I]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x ptr> [ [[TMP1]], %[[NEWFUNCROOT]] ], [ [[TMP76:%.*]], %[[WHILE_BODY_I_I]] ]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x ptr> [[TMP2]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP2]], i32 0
@ -137,102 +137,79 @@ define void @test(ptr %__last.addr.011.i.i, ptr %call3) {
; CHECK-NEXT: [[TMP87:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_19]], align 4
; CHECK-NEXT: store float [[TMP87]], ptr [[INCDEC_PTR2_I_I_19]], align 4
; CHECK-NEXT: store float [[TMP86]], ptr [[__LAST_ADDR_0_I_I_19]], align 4
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_20:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_19]], i32 -4
; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x ptr> poison, ptr [[INCDEC_PTR2_I_I_19]], i32 0
; CHECK-NEXT: [[TMP85:%.*]] = insertelement <2 x ptr> [[TMP43]], ptr [[__LAST_ADDR_0_I_I_19]], i32 1
; CHECK-NEXT: [[TMP100:%.*]] = getelementptr i8, <2 x ptr> [[TMP85]], <2 x i32> <i32 4, i32 -4>
; CHECK-NEXT: [[INCDEC_PTR2_I_I_20:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_19]], i32 4
; CHECK-NEXT: [[TMP101:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_20]], align 4
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_30:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_19]], i32 -4
; CHECK-NEXT: [[INCDEC_PTR2_I_I_30:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_19]], i32 4
; CHECK-NEXT: [[TMP47:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_30]], align 4
; CHECK-NEXT: [[TMP48:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_30]], align 4
; CHECK-NEXT: store float [[TMP48]], ptr [[INCDEC_PTR2_I_I_30]], align 4
; CHECK-NEXT: store float [[TMP47]], ptr [[__LAST_ADDR_0_I_I_30]], align 4
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_21:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_30]], i32 -4
; CHECK-NEXT: [[INCDEC_PTR2_I_I_21:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_30]], i32 4
; CHECK-NEXT: [[TMP49:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_21]], align 4
; CHECK-NEXT: [[TMP50:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_21]], align 4
; CHECK-NEXT: store float [[TMP50]], ptr [[INCDEC_PTR2_I_I_21]], align 4
; CHECK-NEXT: store float [[TMP49]], ptr [[__LAST_ADDR_0_I_I_21]], align 4
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_22:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_21]], i32 -4
; CHECK-NEXT: [[INCDEC_PTR2_I_I_22:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_21]], i32 4
; CHECK-NEXT: [[TMP51:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_22]], align 4
; CHECK-NEXT: [[TMP67:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_22]], align 4
; CHECK-NEXT: store float [[TMP67]], ptr [[INCDEC_PTR2_I_I_22]], align 4
; CHECK-NEXT: store float [[TMP51]], ptr [[__LAST_ADDR_0_I_I_22]], align 4
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_23:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_22]], i32 -4
; CHECK-NEXT: [[INCDEC_PTR2_I_I_23:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_22]], i32 4
; CHECK-NEXT: [[TMP70:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_23]], align 4
; CHECK-NEXT: [[TMP54:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_23]], align 4
; CHECK-NEXT: store float [[TMP54]], ptr [[INCDEC_PTR2_I_I_23]], align 4
; CHECK-NEXT: store float [[TMP70]], ptr [[__LAST_ADDR_0_I_I_23]], align 4
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_24:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_23]], i32 -4
; CHECK-NEXT: [[INCDEC_PTR2_I_I_24:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_23]], i32 4
; CHECK-NEXT: [[TMP55:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_24]], align 4
; CHECK-NEXT: [[TMP73:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_24]], align 4
; CHECK-NEXT: store float [[TMP73]], ptr [[INCDEC_PTR2_I_I_24]], align 4
; CHECK-NEXT: store float [[TMP55]], ptr [[__LAST_ADDR_0_I_I_24]], align 4
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_25:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_24]], i32 -4
; CHECK-NEXT: [[INCDEC_PTR2_I_I_25:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_24]], i32 4
; CHECK-NEXT: [[TMP79:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_25]], align 4
; CHECK-NEXT: [[TMP58:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_25]], align 4
; CHECK-NEXT: store float [[TMP58]], ptr [[INCDEC_PTR2_I_I_25]], align 4
; CHECK-NEXT: store float [[TMP79]], ptr [[__LAST_ADDR_0_I_I_25]], align 4
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_26:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_25]], i32 -4
; CHECK-NEXT: [[INCDEC_PTR2_I_I_26:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_25]], i32 4
; CHECK-NEXT: [[TMP82:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_26]], align 4
; CHECK-NEXT: [[TMP85:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_26]], align 4
; CHECK-NEXT: store float [[TMP85]], ptr [[INCDEC_PTR2_I_I_26]], align 4
; CHECK-NEXT: store float [[TMP82]], ptr [[__LAST_ADDR_0_I_I_26]], align 4
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_27:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_26]], i32 -4
; CHECK-NEXT: [[INCDEC_PTR2_I_I_27:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_26]], i32 4
; CHECK-NEXT: [[TMP61:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_27]], align 4
; CHECK-NEXT: [[TMP88:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_27]], align 4
; CHECK-NEXT: store float [[TMP88]], ptr [[INCDEC_PTR2_I_I_27]], align 4
; CHECK-NEXT: store float [[TMP61]], ptr [[__LAST_ADDR_0_I_I_27]], align 4
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_28:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_27]], i32 -4
; CHECK-NEXT: [[INCDEC_PTR2_I_I_28:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_27]], i32 4
; CHECK-NEXT: [[TMP89:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_28]], align 4
; CHECK-NEXT: [[TMP64:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_28]], align 4
; CHECK-NEXT: store float [[TMP64]], ptr [[INCDEC_PTR2_I_I_28]], align 4
; CHECK-NEXT: store float [[TMP89]], ptr [[__LAST_ADDR_0_I_I_28]], align 4
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_29:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_28]], i32 -4
; CHECK-NEXT: [[INCDEC_PTR2_I_I_29:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_28]], i32 4
; CHECK-NEXT: [[TMP90:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_29]], align 4
; CHECK-NEXT: [[TMP91:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_29]], align 4
; CHECK-NEXT: store float [[TMP91]], ptr [[INCDEC_PTR2_I_I_29]], align 4
; CHECK-NEXT: store float [[TMP90]], ptr [[__LAST_ADDR_0_I_I_29]], align 4
; CHECK-NEXT: [[INCDEC_PTR2_I_I_20:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_29]], i32 -4
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_20:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_29]], i32 4
; CHECK-NEXT: [[TMP102:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_20]], align 4
; CHECK-NEXT: store float [[TMP102]], ptr [[INCDEC_PTR2_I_I_20]], align 4
; CHECK-NEXT: [[TMP101:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_20]], align 4
; CHECK-NEXT: store float [[TMP101]], ptr [[__LAST_ADDR_0_I_I_20]], align 4
; CHECK-NEXT: store float [[TMP102]], ptr [[INCDEC_PTR2_I_I_20]], align 4
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_31:%.*]] = getelementptr inbounds i8, ptr [[INCDEC_PTR2_I_I_20]], i32 -4
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <2 x ptr> poison, ptr [[__LAST_ADDR_0_I_I_20]], i32 0
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <2 x ptr> [[TMP52]], ptr [[INCDEC_PTR2_I_I_20]], i32 1
; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i8, <2 x ptr> [[TMP53]], <2 x i32> <i32 -4, i32 4>
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_21:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_20]], i32 -4
; CHECK-NEXT: [[TMP103:%.*]] = getelementptr i8, <2 x ptr> [[TMP100]], <2 x i32> <i32 4, i32 -4>
; CHECK-NEXT: [[INCDEC_PTR2_I_I_21:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_20]], i32 4
; CHECK-NEXT: [[TMP104:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_21]], align 4
; CHECK-NEXT: [[TMP105:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_21]], align 4
; CHECK-NEXT: store float [[TMP105]], ptr [[INCDEC_PTR2_I_I_21]], align 4
; CHECK-NEXT: store float [[TMP104]], ptr [[__LAST_ADDR_0_I_I_21]], align 4
; CHECK-NEXT: [[TMP58:%.*]] = getelementptr i8, <2 x ptr> [[TMP54]], <2 x i32> <i32 -4, i32 4>
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_22:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_21]], i32 -4
; CHECK-NEXT: [[TMP106:%.*]] = getelementptr i8, <2 x ptr> [[TMP103]], <2 x i32> <i32 4, i32 -4>
; CHECK-NEXT: [[INCDEC_PTR2_I_I_22:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_21]], i32 4
; CHECK-NEXT: [[TMP107:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_22]], align 4
; CHECK-NEXT: [[TMP108:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_22]], align 4
; CHECK-NEXT: store float [[TMP108]], ptr [[INCDEC_PTR2_I_I_22]], align 4
; CHECK-NEXT: store float [[TMP107]], ptr [[__LAST_ADDR_0_I_I_22]], align 4
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i8, <2 x ptr> [[TMP58]], <2 x i32> <i32 -4, i32 4>
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_23:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_22]], i32 -4
; CHECK-NEXT: [[TMP109:%.*]] = getelementptr i8, <2 x ptr> [[TMP106]], <2 x i32> <i32 4, i32 -4>
; CHECK-NEXT: [[INCDEC_PTR2_I_I_23:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_22]], i32 4
; CHECK-NEXT: [[TMP110:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_23]], align 4
; CHECK-NEXT: [[TMP111:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_23]], align 4
; CHECK-NEXT: store float [[TMP111]], ptr [[INCDEC_PTR2_I_I_23]], align 4
; CHECK-NEXT: store float [[TMP110]], ptr [[__LAST_ADDR_0_I_I_23]], align 4
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr i8, <2 x ptr> [[TMP64]], <2 x i32> <i32 -4, i32 4>
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_24:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_23]], i32 -4
; CHECK-NEXT: [[TMP112:%.*]] = getelementptr i8, <2 x ptr> [[TMP109]], <2 x i32> <i32 4, i32 -4>
; CHECK-NEXT: [[INCDEC_PTR2_I_I_24:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_23]], i32 4
; CHECK-NEXT: [[TMP113:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_24]], align 4
; CHECK-NEXT: [[TMP114:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_24]], align 4
; CHECK-NEXT: store float [[TMP114]], ptr [[INCDEC_PTR2_I_I_24]], align 4
; CHECK-NEXT: store float [[TMP113]], ptr [[__LAST_ADDR_0_I_I_24]], align 4
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i8, <2 x ptr> [[TMP67]], <2 x i32> <i32 -4, i32 4>
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_25:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_24]], i32 -4
; CHECK-NEXT: [[TMP115:%.*]] = getelementptr i8, <2 x ptr> [[TMP112]], <2 x i32> <i32 4, i32 -4>
; CHECK-NEXT: [[INCDEC_PTR2_I_I_25:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_24]], i32 4
; CHECK-NEXT: [[TMP116:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_25]], align 4
; CHECK-NEXT: [[TMP117:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_25]], align 4
; CHECK-NEXT: store float [[TMP117]], ptr [[INCDEC_PTR2_I_I_25]], align 4
; CHECK-NEXT: store float [[TMP116]], ptr [[__LAST_ADDR_0_I_I_25]], align 4
; CHECK-NEXT: [[TMP76:%.*]] = getelementptr i8, <2 x ptr> [[TMP70]], <2 x i32> <i32 -4, i32 4>
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_26:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_25]], i32 -4
; CHECK-NEXT: [[TMP118:%.*]] = getelementptr i8, <2 x ptr> [[TMP115]], <2 x i32> <i32 4, i32 -4>
; CHECK-NEXT: [[INCDEC_PTR2_I_I_26:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_25]], i32 4
; CHECK-NEXT: [[TMP119:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_26]], align 4
; CHECK-NEXT: [[TMP120:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_26]], align 4
; CHECK-NEXT: store float [[TMP120]], ptr [[INCDEC_PTR2_I_I_26]], align 4
; CHECK-NEXT: store float [[TMP119]], ptr [[__LAST_ADDR_0_I_I_26]], align 4
; CHECK-NEXT: [[TMP79:%.*]] = getelementptr i8, <2 x ptr> [[TMP76]], <2 x i32> <i32 -4, i32 4>
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_27:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_26]], i32 -4
; CHECK-NEXT: [[TMP121:%.*]] = getelementptr i8, <2 x ptr> [[TMP118]], <2 x i32> <i32 4, i32 -4>
; CHECK-NEXT: [[INCDEC_PTR2_I_I_27:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_26]], i32 4
; CHECK-NEXT: [[TMP122:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_27]], align 4
; CHECK-NEXT: [[TMP123:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_27]], align 4
; CHECK-NEXT: store float [[TMP123]], ptr [[INCDEC_PTR2_I_I_27]], align 4
; CHECK-NEXT: store float [[TMP122]], ptr [[__LAST_ADDR_0_I_I_27]], align 4
; CHECK-NEXT: [[TMP82:%.*]] = getelementptr i8, <2 x ptr> [[TMP79]], <2 x i32> <i32 -4, i32 4>
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_28:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_27]], i32 -4
; CHECK-NEXT: [[TMP124:%.*]] = getelementptr i8, <2 x ptr> [[TMP121]], <2 x i32> <i32 4, i32 -4>
; CHECK-NEXT: [[INCDEC_PTR2_I_I_28:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_27]], i32 4
; CHECK-NEXT: [[TMP125:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_28]], align 4
; CHECK-NEXT: [[TMP126:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_28]], align 4
; CHECK-NEXT: store float [[TMP126]], ptr [[INCDEC_PTR2_I_I_28]], align 4
; CHECK-NEXT: store float [[TMP125]], ptr [[__LAST_ADDR_0_I_I_28]], align 4
; CHECK-NEXT: [[TMP88:%.*]] = getelementptr i8, <2 x ptr> [[TMP82]], <2 x i32> <i32 -4, i32 4>
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_29:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_28]], i32 -4
; CHECK-NEXT: [[TMP127:%.*]] = getelementptr i8, <2 x ptr> [[TMP124]], <2 x i32> <i32 4, i32 -4>
; CHECK-NEXT: [[INCDEC_PTR2_I_I_29:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_28]], i32 4
; CHECK-NEXT: [[TMP128:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_29]], align 4
; CHECK-NEXT: [[TMP129:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_29]], align 4
; CHECK-NEXT: store float [[TMP129]], ptr [[INCDEC_PTR2_I_I_29]], align 4
; CHECK-NEXT: store float [[TMP128]], ptr [[__LAST_ADDR_0_I_I_29]], align 4
; CHECK-NEXT: [[TMP90:%.*]] = getelementptr i8, <2 x ptr> [[TMP88]], <2 x i32> <i32 -4, i32 4>
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_30:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_29]], i32 -4
; CHECK-NEXT: [[TMP130:%.*]] = getelementptr i8, <2 x ptr> [[TMP127]], <2 x i32> <i32 4, i32 -4>
; CHECK-NEXT: [[INCDEC_PTR2_I_I_30:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_29]], i32 4
; CHECK-NEXT: [[TMP131:%.*]] = load float, ptr [[INCDEC_PTR2_I_I_30]], align 4
; CHECK-NEXT: [[TMP132:%.*]] = load float, ptr [[__LAST_ADDR_0_I_I_30]], align 4
; CHECK-NEXT: store float [[TMP132]], ptr [[INCDEC_PTR2_I_I_30]], align 4
; CHECK-NEXT: store float [[TMP131]], ptr [[__LAST_ADDR_0_I_I_30]], align 4
; CHECK-NEXT: [[TMP94]] = getelementptr i8, <2 x ptr> [[TMP90]], <2 x i32> <i32 -4, i32 4>
; CHECK-NEXT: [[__LAST_ADDR_0_I_I_31:%.*]] = getelementptr inbounds i8, ptr [[__LAST_ADDR_0_I_I_30]], i32 -4
; CHECK-NEXT: [[TMP133:%.*]] = getelementptr i8, <2 x ptr> [[TMP130]], <2 x i32> <i32 4, i32 -4>
; CHECK-NEXT: [[INCDEC_PTR2_I_I_31:%.*]] = getelementptr inbounds nuw i8, ptr [[INCDEC_PTR2_I_I_30]], i32 4
; CHECK-NEXT: [[TMP92:%.*]] = getelementptr i8, <2 x ptr> [[TMP53]], <2 x i32> <i32 4, i32 -4>
; CHECK-NEXT: [[INCDEC_PTR2_I_I_31:%.*]] = getelementptr inbounds nuw i8, ptr [[__LAST_ADDR_0_I_I_20]], i32 4
; CHECK-NEXT: [[CMP1_I_I_31:%.*]] = icmp ult ptr [[INCDEC_PTR2_I_I_31]], [[__LAST_ADDR_0_I_I_31]]
; CHECK-NEXT: [[TMP76]] = shufflevector <2 x ptr> [[TMP92]], <2 x ptr> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: br i1 [[CMP1_I_I_31]], label %[[WHILE_BODY_I_I]], label %[[INVOKE_CONT21_EXITSTUB:.*]]
; CHECK: [[INVOKE_CONT21_EXITSTUB]]:
; CHECK-NEXT: ret void

View File

@ -5,12 +5,19 @@ define void @test() {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.experimental.vp.strided.load.v4i16.p0.i64(ptr align 2 null, i64 6, <4 x i1> splat (i1 true), i32 4)
; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr getelementptr inbounds (i8, ptr null, i64 6), align 2
; CHECK-NEXT: [[TMP10:%.*]] = load i16, ptr getelementptr (i8, ptr null, i64 12), align 2
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr null, align 2
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP12]], i16 [[TMP7]], i32 2
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP10]], i32 3
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[TMP13]], <4 x i16> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i16> [[TMP0]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr getelementptr (i8, ptr null, i64 18), align 2
; CHECK-NEXT: [[TMP9:%.*]] = xor i16 [[TMP8]], 0
; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0)
; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.smax.i16(i16 [[TMP4]], i16 [[TMP3]])
; CHECK-NEXT: [[TMP11:%.*]] = call i16 @llvm.smax.i16(i16 [[TMP3]], i16 [[TMP9]])
; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.smax.i16(i16 [[TMP11]], i16 0)
; CHECK-NEXT: [[TMP6:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP5]], i16 0)
; CHECK-NEXT: ret void
;

View File

@ -4,19 +4,17 @@
define float @test(ptr %call78) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> <ptr poison, ptr null>, ptr [[CALL78:%.*]], i32 0
; CHECK-NEXT: br label [[FOR_BODY194:%.*]]
; CHECK: for.body194:
; CHECK-NEXT: [[INDVARS_IV132:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[FOR_BODY194]] ]
; CHECK-NEXT: [[CURRENTW_031:%.*]] = phi ptr [ [[CALL78]], [[ENTRY]] ], [ [[PREVIOUSW_030:%.*]], [[FOR_BODY194]] ]
; CHECK-NEXT: [[CURRENTW_031:%.*]] = phi ptr [ [[CALL78:%.*]], [[ENTRY]] ], [ [[PREVIOUSW_030:%.*]], [[FOR_BODY194]] ]
; CHECK-NEXT: [[PREVIOUSW_030]] = phi ptr [ null, [[ENTRY]] ], [ [[CURRENTW_031]], [[FOR_BODY194]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x ptr> [ [[TMP0]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY194]] ]
; CHECK-NEXT: [[TMP3]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: store float 0.000000e+00, ptr [[CURRENTW_031]], align 4
; CHECK-NEXT: tail call void null(ptr [[PREVIOUSW_030]], ptr null, ptr null, i32 0, i32 0, ptr null, ptr null, i32 0)
; CHECK-NEXT: br i1 false, label [[FOR_END286_LOOPEXIT:%.*]], label [[FOR_BODY194]]
; CHECK: for.end286.loopexit:
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x ptr> [ [[TMP3]], [[FOR_BODY194]] ]
; CHECK-NEXT: [[CURRENTW_031_LCSSA:%.*]] = phi ptr [ [[CURRENTW_031]], [[FOR_BODY194]] ]
; CHECK-NEXT: [[PREVIOUSW_030_LCSSA:%.*]] = phi ptr [ [[PREVIOUSW_030]], [[FOR_BODY194]] ]
; CHECK-NEXT: ret float 0.000000e+00
;
entry:

View File

@ -31,6 +31,6 @@ define void @fun(ptr nocapture, i32 zeroext) local_unnamed_addr #0 {
._crit_edge: ; preds = %.lr.ph
ret void
; CHECK: SLP: Adding cost -1 for bundle Idx: 4, n=2 [ %4 = icmp ult i32 %2, %1, ..]
; CHECK: SLP: Adding cost -2 for bundle Idx: 4, n=2 [ %4 = icmp ult i32 %2, %1, ..]
}

View File

@ -73,7 +73,7 @@ define void @fun1(double %0) {
; REMARK-LABEL: Function: fun1
; REMARK: Args:
; REMARK: - String: 'SLP vectorized with cost '
; REMARK-NEXT: - Cost: '-1'
; REMARK-NEXT: - Cost: '-2'
br label %2

View File

@ -283,7 +283,7 @@ return: ; preds = %entry, %if.end
define void @PR19646(ptr %this, i1 %arg) {
; CHECK-LABEL: @PR19646(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 %arg, label [[IF_END13:%.*]], label [[IF_END13]]
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[IF_END13:%.*]], label [[IF_END13]]
; CHECK: sw.epilog7:
; CHECK-NEXT: [[DOTIN:%.*]] = getelementptr inbounds [[CLASS_B_53_55:%.*]], ptr [[THIS:%.*]], i64 0, i32 0, i32 1
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[DOTIN]], align 8
@ -294,7 +294,7 @@ define void @PR19646(ptr %this, i1 %arg) {
; CHECK-NEXT: [[_DY:%.*]] = getelementptr inbounds [[CLASS_B_53_55]], ptr [[THIS]], i64 0, i32 0, i32 2
; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[_DY]], align 8
; CHECK-NEXT: [[ADD10:%.*]] = fadd double [[ADD8]], [[TMP2]]
; CHECK-NEXT: br i1 %arg, label [[IF_THEN12:%.*]], label [[IF_END13]]
; CHECK-NEXT: br i1 [[ARG]], label [[IF_THEN12:%.*]], label [[IF_END13]]
; CHECK: if.then12:
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8
; CHECK-NEXT: br label [[IF_END13]]

View File

@ -14,16 +14,16 @@ define void @test(i32 %arg, i32 %arg1, i64 %arg2) {
; CHECK-NEXT: [[SHL17:%.*]] = shl i32 [[SHL]], 0
; CHECK-NEXT: [[ADD18:%.*]] = add i64 1, 0
; CHECK-NEXT: [[TRUNC19:%.*]] = trunc i64 [[ADD18]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i64> <i64 poison, i64 0, i64 poison, i64 0>, i64 [[PHI]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
; CHECK-NEXT: [[TMP7:%.*]] = or <4 x i64> zeroinitializer, [[TMP6]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[TMP3]] to i32
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
; CHECK-NEXT: [[TRUNC10:%.*]] = trunc i64 [[TMP10]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> <i64 poison, i64 0, i64 poison, i64 0>, i64 [[PHI]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i64> zeroinitializer, [[TMP3]]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[TMP8]] to i32
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
; CHECK-NEXT: [[TRUNC10:%.*]] = trunc i64 [[TMP6]] to i32
; CHECK-NEXT: [[OR11:%.*]] = or i32 [[TRUNC10]], 0
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i64> zeroinitializer, [[TMP6]]
; CHECK-NEXT: [[TMP13:%.*]] = mul <4 x i64> [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i64> zeroinitializer, [[TMP3]]
; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i64> [[TMP5]], [[TMP4]]
; CHECK-NEXT: [[XOR:%.*]] = xor i32 0, [[TRUNC]]
; CHECK-NEXT: [[SEXT15:%.*]] = sext i32 [[XOR]] to i64
; CHECK-NEXT: [[SEXT22:%.*]] = sext i32 [[SHL17]] to i64
@ -37,8 +37,8 @@ define void @test(i32 %arg, i32 %arg1, i64 %arg2) {
; CHECK-NEXT: [[SEXT35:%.*]] = sext i32 [[SHL34]] to i64
; CHECK-NEXT: [[XOR37:%.*]] = xor i32 [[ARG]], [[TRUNC27]]
; CHECK-NEXT: [[SEXT38:%.*]] = sext i32 [[XOR37]] to i64
; CHECK-NEXT: [[TMP31:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP13]])
; CHECK-NEXT: [[OP_RDX:%.*]] = add i64 [[TMP31]], [[SEXT]]
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP9]])
; CHECK-NEXT: [[OP_RDX:%.*]] = add i64 [[TMP7]], [[SEXT]]
; CHECK-NEXT: [[OP_RDX1:%.*]] = add i64 [[SEXT15]], [[SEXT22]]
; CHECK-NEXT: [[OP_RDX2:%.*]] = add i64 [[SEXT25]], [[SEXT29]]
; CHECK-NEXT: [[OP_RDX3:%.*]] = add i64 [[SEXT32]], [[SEXT35]]

View File

@ -5,37 +5,35 @@ define void @test(ptr %0, ptr %1, i1 %cond, double %2) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], i1 [[COND:%.*]], double [[TMP2:%.*]]) {
; CHECK-NEXT: [[ITER_CHECK:.*]]:
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br [[DOTLR_PH383_US_US_US_US:label %.*]]
; CHECK: [[_LR_PH383_US_US_US_US:.*:]]
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x double> [ [[TMP22:%.*]], %[[DOT_CRIT_EDGE384_US_US_US_US:.*]] ], [ zeroinitializer, %[[ITER_CHECK]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x double> [ [[TMP18:%.*]], %[[DOT_CRIT_EDGE384_US_US_US_US:.*]] ], [ zeroinitializer, %[[ITER_CHECK]] ]
; CHECK-NEXT: br i1 false, label %[[DOTLR_PH383_US_US_US_US___CRIT_EDGE384_US_US_US_US_CRIT_EDGE:.*]], label %[[BB6:.*]]
; CHECK: [[_LR_PH383_US_US_US_US___CRIT_EDGE384_US_US_US_US_CRIT_EDGE:.*:]]
; CHECK-NEXT: br label %[[DOT_CRIT_EDGE384_US_US_US_US]]
; CHECK: [[BB6]]:
; CHECK-NEXT: br i1 [[COND]], label %[[BB16:.*]], label %[[BB7:.*]]
; CHECK: [[BB7]]:
; CHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP1]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP0]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP10]], <2 x double> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], <double 1.000000e+00, double 0.000000e+00>
; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = fadd double [[TMP2]], [[TMP8]]
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[TMP13]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP1]], align 8
; CHECK-NEXT: [[TMP11:%.*]] = fmul double [[TMP9]], 0.000000e+00
; CHECK-NEXT: [[TMP12:%.*]] = fadd double [[TMP11]], 0.000000e+00
; CHECK-NEXT: [[TMP16:%.*]] = fadd double [[TMP9]], 0.000000e+00
; CHECK-NEXT: [[TMP17:%.*]] = fadd double [[TMP2]], [[TMP10]]
; CHECK-NEXT: br label %[[BB16]]
; CHECK: [[BB16]]:
; CHECK-NEXT: [[DOT0304_US_US_US_US:%.*]] = phi double [ [[TMP14]], %[[BB7]] ], [ 0.000000e+00, %[[BB6]] ]
; CHECK-NEXT: [[DOT0301_US_US_US_US:%.*]] = phi double [ [[TMP15]], %[[BB7]] ], [ 0.000000e+00, %[[BB6]] ]
; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x double> [ [[TMP13]], %[[BB7]] ], [ [[TMP4]], %[[BB6]] ]
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x double> [[TMP17]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x double> [[TMP18]], double [[DOT0301_US_US_US_US]], i32 1
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x double> [[TMP19]], double [[DOT0304_US_US_US_US]], i32 2
; CHECK-NEXT: [[DOT0311_US_US_US_US:%.*]] = phi double [ [[TMP12]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
; CHECK-NEXT: [[DOT0304_US_US_US_US:%.*]] = phi double [ [[TMP17]], %[[BB7]] ], [ 0.000000e+00, %[[BB6]] ]
; CHECK-NEXT: [[DOT0301_US_US_US_US:%.*]] = phi double [ [[TMP16]], %[[BB7]] ], [ 0.000000e+00, %[[BB6]] ]
; CHECK-NEXT: [[DOT0257_US_US_US_US:%.*]] = phi double [ [[TMP16]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x double> poison, double [[DOT0257_US_US_US_US]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x double> [[TMP13]], double [[DOT0301_US_US_US_US]], i32 1
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x double> [[TMP14]], double [[DOT0304_US_US_US_US]], i32 2
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x double> [[TMP15]], double [[DOT0311_US_US_US_US]], i32 3
; CHECK-NEXT: [[TMP21:%.*]] = fadd <4 x double> [[TMP20]], [[TMP5]]
; CHECK-NEXT: br label %[[DOT_CRIT_EDGE384_US_US_US_US]]
; CHECK: [[__CRIT_EDGE384_US_US_US_US:.*:]]
; CHECK-NEXT: [[TMP22]] = phi <4 x double> [ [[TMP21]], %[[BB16]] ], [ zeroinitializer, %[[DOTLR_PH383_US_US_US_US___CRIT_EDGE384_US_US_US_US_CRIT_EDGE]] ]
; CHECK-NEXT: [[TMP18]] = phi <4 x double> [ [[TMP21]], %[[BB16]] ], [ zeroinitializer, %[[DOTLR_PH383_US_US_US_US___CRIT_EDGE384_US_US_US_US_CRIT_EDGE]] ]
; CHECK-NEXT: br [[DOTLR_PH383_US_US_US_US]]
;
iter.check:

View File

@ -8,7 +8,7 @@
; YAML-NEXT: Function: test
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'SLP vectorized with cost '
; YAML-NEXT: - Cost: '-2'
; YAML-NEXT: - Cost: '-38'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '7'
; YAML-NEXT: ...

View File

@ -12,25 +12,23 @@ define void @test(ptr %0, ptr %1, double %2) {
; CHECK: [[BB4]]:
; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP1]], align 8
; CHECK-NEXT: [[TMP12:%.*]] = fadd double [[TMP2]], 0.000000e+00
; CHECK-NEXT: [[TMP9:%.*]] = fadd double [[TMP5]], 0.000000e+00
; CHECK-NEXT: [[TMP8:%.*]] = fadd double [[TMP2]], 0.000000e+00
; CHECK-NEXT: [[TMP6:%.*]] = fmul double [[TMP5]], 0.000000e+00
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[TMP7]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[TMP6]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[TMP10:%.*]] = fadd double [[TMP7]], [[TMP6]]
; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[TMP0]], align 8
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> [[TMP14]], double [[TMP13]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[TMP12]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> [[TMP12]], double [[TMP13]], i32 1
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP14]], double [[TMP8]], i32 1
; CHECK-NEXT: [[TMP18:%.*]] = fadd <2 x double> [[TMP15]], [[TMP17]]
; CHECK-NEXT: br label %[[BB19:.*]]
; CHECK: [[BB19]]:
; CHECK-NEXT: br label %[[BB20:.*]]
; CHECK: [[BB20]]:
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <2 x double> [[TMP18]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x double> [[TMP21]], <4 x double> [[TMP22]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x double> [[TMP21]], double [[TMP9]], i32 2
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x double> [[TMP20]], double [[TMP10]], i32 3
; CHECK-NEXT: [[TMP24:%.*]] = fmul <4 x double> [[TMP23]], <double 0.000000e+00, double 1.000000e+00, double 1.000000e+00, double 0.000000e+00>
; CHECK-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[TMP24]], [[TMP3]]
; CHECK-NEXT: br label %[[DOT_CRIT_EDGE384_US_US_US_US]]

View File

@ -12,11 +12,12 @@ define void @test() {
; CHECK: loop:
; CHECK-NEXT: [[PHI1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX25:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x i64> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[LOOP]] ]
; CHECK-NEXT: [[TMP7:%.*]] = mul <8 x i64> [[TMP6]], splat (i64 4)
; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP1]], splat (i64 2)
; CHECK-NEXT: [[RDX_OP:%.*]] = add <8 x i64> [[TMP7]], [[TMP5]]
; CHECK-NEXT: [[OP_RDX16:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[RDX_OP]])
; CHECK-NEXT: [[OP_RDX25]] = add i64 [[OP_RDX16]], [[TMP3]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP6]])
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP8]], 2
; CHECK-NEXT: [[OP_RDX17:%.*]] = add i64 [[TMP7]], [[TMP3]]
; CHECK-NEXT: [[OP_RDX25]] = add i64 [[OP_RDX17]], [[TMP5]]
; CHECK-NEXT: br label [[LOOP]]
;
entry:

View File

@ -12,36 +12,37 @@ define dso_local i32 @g() local_unnamed_addr {
; CHECK-NEXT: br i1 [[TOBOOL_NOT19]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[A_020:%.*]] = phi ptr [ [[A_020_BE:%.*]], [[WHILE_BODY_BACKEDGE:%.*]] ], [ undef, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x ptr> [ [[TMP14:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ undef, [[ENTRY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1
; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64
; CHECK-NEXT: [[B_021:%.*]] = phi ptr [ [[B_021_BE:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ undef, [[ENTRY]] ]
; CHECK-NEXT: [[A_20:%.*]] = phi ptr [ [[A_020_BE1:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ undef, [[ENTRY]] ]
; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 1
; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[A_020]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 1
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> splat (i64 1)
; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[A_20]], i64 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B_021]], i64 1
; CHECK-NEXT: switch i32 [[TMP4]], label [[WHILE_BODY_BACKEDGE]] [
; CHECK-NEXT: i32 2, label [[SW_BB:%.*]]
; CHECK-NEXT: i32 4, label [[SW_BB6:%.*]]
; CHECK-NEXT: ]
; CHECK: sw.bb:
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1
; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[B_021]], i64 2
; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 2
; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[A_20]], i64 2
; CHECK-NEXT: store i32 [[TMP8]], ptr [[INCDEC_PTR1]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> splat (i64 2)
; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 2
; CHECK-NEXT: br label [[WHILE_BODY_BACKEDGE]]
; CHECK: sw.bb6:
; CHECK-NEXT: [[INCDEC_PTR8:%.*]] = getelementptr inbounds i32, ptr [[A_20]], i64 2
; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 2
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[INCDEC_PTR]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> splat (i64 2)
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1
; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP13]], align 4
; CHECK-NEXT: [[INCDEC_PTR9:%.*]] = getelementptr inbounds i32, ptr [[B_021]], i64 2
; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP6]], align 4
; CHECK-NEXT: br label [[WHILE_BODY_BACKEDGE]]
; CHECK: while.body.backedge:
; CHECK-NEXT: [[A_020_BE]] = phi ptr [ [[INCDEC_PTR1]], [[WHILE_BODY]] ], [ [[INCDEC_PTR7]], [[SW_BB6]] ], [ [[INCDEC_PTR4]], [[SW_BB]] ]
; CHECK-NEXT: [[TMP14]] = phi <2 x ptr> [ [[TMP5]], [[WHILE_BODY]] ], [ [[TMP12]], [[SW_BB6]] ], [ [[TMP9]], [[SW_BB]] ]
; CHECK-NEXT: [[A_020_BE]] = phi ptr [ [[INCDEC_PTR]], [[WHILE_BODY]] ], [ [[INCDEC_PTR7]], [[SW_BB6]] ], [ [[INCDEC_PTR5]], [[SW_BB]] ]
; CHECK-NEXT: [[B_021_BE]] = phi ptr [ [[TMP6]], [[WHILE_BODY]] ], [ [[INCDEC_PTR9]], [[SW_BB6]] ], [ [[INCDEC_PTR3]], [[SW_BB]] ]
; CHECK-NEXT: [[A_020_BE1]] = phi ptr [ [[INCDEC_PTR1]], [[WHILE_BODY]] ], [ [[INCDEC_PTR8]], [[SW_BB6]] ], [ [[INCDEC_PTR4]], [[SW_BB]] ]
; CHECK-NEXT: br label [[WHILE_BODY]]
; CHECK: while.end:
; CHECK-NEXT: ret i32 undef

View File

@ -12,22 +12,10 @@ define void @test() #0 {
; CHECK: bb1:
; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ 1, [[BB1]] ], [ 2, [[BB:%.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP18:%.*]], [[BB1]] ], [ 3, [[BB]] ]
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 4, [[TMP]]
; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], [[TMP]]
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], [[TMP]]
; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP]]
; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], [[TMP]]
; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP]]
; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], [[TMP]]
; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP]]
; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], [[TMP]]
; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], [[TMP]]
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], [[TMP]]
; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], [[TMP]]
; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[TMP16]], [[TMP]]
; CHECK-NEXT: [[TMP18]] = mul i32 [[TMP17]], [[TMP]]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[TMP]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[TMP1]])
; CHECK-NEXT: [[TMP18]] = mul i32 [[TMP2]], 4
; CHECK-NEXT: br label [[BB1]]
;
bb:

View File

@ -8,7 +8,7 @@
; YAML: Function: test
; YAML: Args:
; YAML: - String: 'Stores SLP vectorized with cost '
; YAML: - Cost: '-3'
; YAML: - Cost: '-9'
; YAML: - String: ' and with tree size '
; YAML: - TreeSize: '16'
; YAML: ...

View File

@ -9,8 +9,7 @@ define void @test() {
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i64> [[TMP7]] to <16 x i1>
; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP8]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = freeze <16 x i1> [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i1> [[TMP10]] to <16 x i16>
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <16 x i16> [[TMP11]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <16 x i1> [[TMP10]], zeroinitializer
; CHECK-NEXT: ret void
;
%xor108.i.i.i = xor i64 0, 1

View File

@ -5,58 +5,64 @@ define i32 @main(ptr %c, i32 %0, i1 %tobool4.not, i16 %1) {
; CHECK-LABEL: define i32 @main(
; CHECK-SAME: ptr [[C:%.*]], i32 [[TMP0:%.*]], i1 [[TOBOOL4_NOT:%.*]], i16 [[TMP1:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 1, i32 0, i32 poison>, i32 [[TMP0]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 1, i32 poison, i32 1, i32 1>, i32 [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[TMP0]], i32 1
; CHECK-NEXT: br label %[[IF_END:.*]]
; CHECK: [[IF_END]]:
; CHECK-NEXT: [[B_0_PH:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP33:%.*]], %[[WHILE_COND_PREHEADER:.*]] ]
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP32:%.*]], %[[WHILE_COND_PREHEADER]] ]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B_0_PH]], i32 0
; CHECK-NEXT: [[B_0_PH:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP32:%.*]], %[[WHILE_COND_PREHEADER:.*]] ]
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP33:%.*]], %[[WHILE_COND_PREHEADER]] ]
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[B_0_PH]], i32 0
; CHECK-NEXT: br i1 [[TOBOOL4_NOT]], label %[[R:.*]], label %[[IF_END9:.*]]
; CHECK: [[IF_END9]]:
; CHECK-NEXT: [[CONV11:%.*]] = sext i16 [[TMP1]] to i32
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 1, i32 0>, i32 [[CONV11]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[CONV11]], i32 0
; CHECK-NEXT: br label %[[R]]
; CHECK: [[R]]:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ [[TMP9]], %[[IF_END9]] ], [ [[TMP6]], %[[IF_END]] ]
; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ <i32 1, i32 0>, %[[IF_END9]] ], [ [[TMP2]], %[[IF_END]] ]
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ [[TMP5]], %[[IF_END9]] ], [ [[TMP4]], %[[IF_END]] ]
; CHECK-NEXT: [[TOBOOL12_NOT:%.*]] = icmp eq i32 [[B_0_PH]], 0
; CHECK-NEXT: br i1 [[TOBOOL12_NOT]], label %[[IF_END14:.*]], label %[[IF_THEN13:.*]]
; CHECK: [[IF_THEN13]]:
; CHECK-NEXT: br label %[[IF_END14]]
; CHECK: [[IF_END14]]:
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP8]], 1
; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[AND]], 1
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
; CHECK-NEXT: [[AND17:%.*]] = and i32 [[TMP9]], 1
; CHECK-NEXT: [[DIV20:%.*]] = sdiv i32 [[AND17]], [[TMP0]]
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[C]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP11]], <4 x i32> <i32 4, i32 poison, i32 2, i32 5>
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 1
; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i32> [[TMP12]], <i32 1, i32 -1, i32 1, i32 1>
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> <i32 -1, i32 poison, i32 -1, i32 -1>, <4 x i32> [[TMP11]], <4 x i32> <i32 0, i32 4, i32 2, i32 3>
; CHECK-NEXT: [[TMP24:%.*]] = and <4 x i32> splat (i32 1), [[TMP17]]
; CHECK-NEXT: [[TMP15:%.*]] = sdiv <4 x i32> [[TMP24]], [[TMP4]]
; CHECK-NEXT: [[AND25:%.*]] = and i32 [[TMP0]], 1
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[AND17]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP10]], i32 2
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[AND25]], i32 3
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> <i32 0, i32 1, i32 poison, i32 1>, i32 [[DIV20]], i32 2
; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP14]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP16]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x i32> <i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison>, i32 [[B_0_PH]], i32 0
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x i32> [[TMP21]], i32 [[TMP0]], i32 7
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> poison, i32 [[NOT]], i32 2
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP6]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 5>
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x i32> [[TMP13]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i32> [[TMP18]], <4 x i32> [[TMP19]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[B_0_PH]], i32 0
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP0]], i32 2
; CHECK-NEXT: br label %[[AH:.*]]
; CHECK: [[AH]]:
; CHECK-NEXT: [[TMP23:%.*]] = phi <8 x i32> [ [[TMP22]], %[[AH]] ], [ [[TMP20]], %[[IF_END14]] ]
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP23]], i32 5
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP23]], i32 7
; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x i32> [[TMP23]], <8 x i32> poison, <2 x i32> <i32 5, i32 7>
; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP21]], %[[AH]] ], [ [[TMP16]], %[[IF_END14]] ]
; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ [[TMP22]], %[[AH]] ], [ [[TMP20]], %[[IF_END14]] ]
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP23]], i32 2
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP24]], i32 2
; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> [[TMP24]], <2 x i32> <i32 2, i32 6>
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP25]], [[TMP26]]
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP23]], i32 4
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP23]], i32 1
; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[ADD]], [[TMP28]]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i32> [[TMP23]], i32 6
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP23]], i32 3
; CHECK-NEXT: [[OR27:%.*]] = or i32 [[TMP29]], [[TMP30]]
; CHECK-NEXT: store i32 [[OR27]], ptr [[C]], align 4
; CHECK-NEXT: br i1 [[TOBOOL4_NOT]], label %[[WHILE_COND_PREHEADER]], label %[[AH]]
; CHECK: [[WHILE_COND_PREHEADER]]:
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <8 x i32> [[TMP23]], i32 1
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP24]], i32 3
; CHECK-NEXT: [[CALL69:%.*]] = tail call i32 @s(i32 [[TMP31]])
; CHECK-NEXT: [[TMP33]] = extractelement <8 x i32> [[TMP23]], i32 0
; CHECK-NEXT: [[TMP32]] = shufflevector <8 x i32> [[TMP23]], <8 x i32> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP32]] = extractelement <4 x i32> [[TMP23]], i32 0
; CHECK-NEXT: [[TMP33]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: br label %[[IF_END]]
;
entry:

View File

@ -136,30 +136,31 @@ for.end: ; preds = %for.body
define float @foo3(ptr nocapture readonly %A) #0 {
; CHECK-LABEL: @foo3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[A:%.*]], align 4
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP2]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = phi float [ [[TMP4]], [[ENTRY]] ], [ [[TMP21:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi float [ [[TMP2]], [[ENTRY]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP1]], [[ENTRY]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], 7.000000e+00
; CHECK-NEXT: [[ADD6]] = fadd float [[R_052]], [[MUL]]
; CHECK-NEXT: [[TMP6:%.*]] = add nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 3
; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[INDVARS_IV]], 4
; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP21]] = load float, ptr [[ARRAYIDX24]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX14]], align 4
; CHECK-NEXT: [[TMP7]] = load <2 x float>, ptr [[ARRAYIDX19]], align 4
; CHECK-NEXT: [[TMP10]] = load float, ptr [[ARRAYIDX19]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 poison, i32 0, i32 1, i32 poison>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x i32> <i32 5, i32 1, i32 2, i32 poison>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP11]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP21]], i32 3
; CHECK-NEXT: [[TMP14:%.*]] = fmul <4 x float> [[TMP13]], <float 8.000000e+00, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01>
; CHECK-NEXT: [[TMP15]] = fadd <4 x float> [[TMP3]], [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32

View File

@ -8,33 +8,33 @@ define void @test(i32 %arg) {
; CHECK-NEXT: br label %[[BB1:.*]]
; CHECK: [[BB1]]:
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP5:%.*]], %[[BB1]] ]
; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[ADD24:%.*]], %[[BB1]] ]
; CHECK-NEXT: [[PHI3:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[XOR26:%.*]], %[[BB1]] ]
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[PHI2]], 0
; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[PHI2]], 0
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[ADD]], [[ADD4]]
; CHECK-NEXT: [[ADD23:%.*]] = add i32 [[PHI]], 0
; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[PHI2]], 0
; CHECK-NEXT: [[ADD7:%.*]] = add i32 [[PHI2]], 0
; CHECK-NEXT: [[XOR8:%.*]] = xor i32 [[ADD6]], [[XOR]]
; CHECK-NEXT: [[XOR9:%.*]] = xor i32 [[XOR8]], [[ADD23]]
; CHECK-NEXT: [[OP_RDX1:%.*]] = xor i32 [[XOR9]], [[ADD7]]
; CHECK-NEXT: [[OP_RDX2:%.*]] = add i32 [[PHI]], 0
; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[PHI2]], 0
; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[ADD26:%.*]], %[[BB1]] ]
; CHECK-NEXT: [[PHI3:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[XOR27:%.*]], %[[BB1]] ]
; CHECK-NEXT: [[ADD13:%.*]] = add i32 [[PHI2]], 0
; CHECK-NEXT: [[OP_RDX3:%.*]] = xor i32 [[OP_RDX]], [[OP_RDX1]]
; CHECK-NEXT: [[OP_RDX4:%.*]] = xor i32 [[OP_RDX3]], [[OP_RDX2]]
; CHECK-NEXT: [[XOR16:%.*]] = xor i32 [[OP_RDX4]], [[ADD13]]
; CHECK-NEXT: [[ADD17:%.*]] = add i32 [[PHI]], 0
; CHECK-NEXT: [[ADD18:%.*]] = add i32 [[PHI2]], 0
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[ADD13]], [[ADD18]]
; CHECK-NEXT: [[ADD5:%.*]] = add i32 [[PHI]], 0
; CHECK-NEXT: [[ADD19:%.*]] = add i32 [[PHI2]], 0
; CHECK-NEXT: [[XOR20:%.*]] = xor i32 [[ADD18]], [[XOR16]]
; CHECK-NEXT: [[XOR21:%.*]] = xor i32 [[XOR20]], [[ADD17]]
; CHECK-NEXT: [[XOR22:%.*]] = xor i32 [[XOR21]], [[ADD19]]
; CHECK-NEXT: [[ADD7:%.*]] = add i32 [[PHI2]], 0
; CHECK-NEXT: [[XOR8:%.*]] = xor i32 [[ADD19]], [[XOR]]
; CHECK-NEXT: [[XOR9:%.*]] = xor i32 [[XOR8]], [[ADD5]]
; CHECK-NEXT: [[XOR22:%.*]] = xor i32 [[XOR9]], [[ADD7]]
; CHECK-NEXT: [[ADD24:%.*]] = add i32 [[PHI]], 0
; CHECK-NEXT: [[ADD25:%.*]] = add i32 [[PHI2]], 0
; CHECK-NEXT: [[ADD24]] = add i32 [[ARG]], 0
; CHECK-NEXT: [[ADD14:%.*]] = add i32 [[PHI2]], 0
; CHECK-NEXT: [[XOR25:%.*]] = xor i32 [[ADD25]], [[XOR22]]
; CHECK-NEXT: [[XOR26]] = xor i32 [[XOR25]], [[ADD24]]
; CHECK-NEXT: [[XOR26:%.*]] = xor i32 [[XOR25]], [[ADD24]]
; CHECK-NEXT: [[XOR16:%.*]] = xor i32 [[XOR26]], [[ADD14]]
; CHECK-NEXT: [[ADD17:%.*]] = add i32 [[PHI]], 0
; CHECK-NEXT: [[ADD20:%.*]] = add i32 [[PHI2]], 0
; CHECK-NEXT: [[ADD21:%.*]] = add i32 [[PHI2]], 0
; CHECK-NEXT: [[XOR20:%.*]] = xor i32 [[ADD20]], [[XOR16]]
; CHECK-NEXT: [[XOR21:%.*]] = xor i32 [[XOR20]], [[ADD17]]
; CHECK-NEXT: [[XOR23:%.*]] = xor i32 [[XOR21]], [[ADD21]]
; CHECK-NEXT: [[ADD23:%.*]] = add i32 [[PHI2]], 0
; CHECK-NEXT: [[ADD26]] = add i32 [[ARG]], 0
; CHECK-NEXT: [[XOR28:%.*]] = xor i32 [[ADD23]], [[XOR23]]
; CHECK-NEXT: [[XOR27]] = xor i32 [[XOR28]], [[ADD26]]
; CHECK-NEXT: [[TMP5]] = add i32 1, 0
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i32 [[TMP5]], 0
; CHECK-NEXT: br label %[[BB1]]

View File

@ -8,19 +8,15 @@ define i32 @test(i1 %cond) {
; CHECK-NEXT: [[OR92:%.*]] = or i32 1, 0
; CHECK-NEXT: br label %[[BB:.*]]
; CHECK: [[BB]]:
; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OR92]], %[[BB]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], %[[BB]] ], [ zeroinitializer, %[[ENTRY]] ]
; CHECK-NEXT: [[P3:%.*]] = phi i32 [ [[OP_RDX:%.*]], %[[BB]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ splat (i32 1), %[[BB]] ], [ zeroinitializer, %[[ENTRY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 0>, <4 x i32> <i32 poison, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[P1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> <i32 poison, i32 poison, i32 0, i32 0>, <4 x i32> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> zeroinitializer, [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8]] = xor <2 x i32> [[TMP9]], <i32 1, i32 0>
; CHECK-NEXT: [[OP_RDX]] = xor i32 [[TMP6]], [[OR92]]
; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[BB]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[OP_RDX:%.*]] = extractelement <2 x i32> [[TMP8]], i32 0
; CHECK-NEXT: ret i32 [[OP_RDX]]
;
entry:

View File

@ -86,7 +86,7 @@ for.body: ; preds = %for.body, %entry
; YAML-NEXT: Function: foo
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
; YAML-NEXT: - Cost: '-5'
; YAML-NEXT: - Cost: '-40'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '4'
@ -96,7 +96,7 @@ for.body: ; preds = %for.body, %entry
; YAML-NEXT: Function: foo
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
; YAML-NEXT: - Cost: '-7'
; YAML-NEXT: - Cost: '-56'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '1'

View File

@ -62,7 +62,7 @@ for.body: ; preds = %for.body, %entry
; YAML-NEXT: Function: foo
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
; YAML-NEXT: - Cost: '-1'
; YAML-NEXT: - Cost: '-8'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '4'

View File

@ -11,8 +11,7 @@ define void @test() {
; CHECK: 1:
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> zeroinitializer, [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i1> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i8>
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i8> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i1> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP6]] = and <2 x i1> [[TMP5]], zeroinitializer
; CHECK-NEXT: br label [[FOR_BODY]]
;

View File

@ -18,13 +18,10 @@ define void @foo(i1 %arg) {
; CHECK: bb4:
; CHECK-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP2]] to <4 x double>
; CHECK-NEXT: [[CONV2:%.*]] = uitofp i16 0 to double
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[TMP3]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[CONV2]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> <double poison, double poison, double 0.000000e+00, double 0.000000e+00>, <4 x double> [[TMP15]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[TMP3]], [[CONV2]]
; CHECK-NEXT: [[SUB1:%.*]] = fsub double 0.000000e+00, 0.000000e+00
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> <double poison, double poison, double 0.000000e+00, double 0.000000e+00>, double [[SUB1]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x double> [[TMP5]], double [[ADD1]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]]
; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float>
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]]

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: %if x86-registered-target %{ opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-apple-macosx10.9.0 | FileCheck %s --check-prefixes=CHECK-X86 %}
; RUN: %if aarch64-registered-target %{ opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=CHECK-AARCH64 %}
; RUN: %if x86-registered-target %{ opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-apple-macosx10.9.0 | FileCheck %s %}
; RUN: %if aarch64-registered-target %{ opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu | FileCheck %s %}
@A = common global [2000 x double] zeroinitializer, align 16
@B = common global [2000 x double] zeroinitializer, align 16
@ -181,7 +181,7 @@ define i32 @foo_loop(ptr %A, i32 %n) {
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> <double 7.000000e+00, double 7.000000e+00>, [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> splat (double 7.000000e+00), [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
; CHECK-NEXT: [[ADD6:%.*]] = fadd double [[TMP4]], [[TMP5]]
@ -355,7 +355,7 @@ define i32 @foo_loop_non_power_of_2(ptr %A, i32 %n) {
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_5]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> <double 7.000000e+00, double 7.000000e+00>, [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> splat (double 7.000000e+00), [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
; CHECK-NEXT: [[ADD6:%.*]] = fadd double [[TMP4]], [[TMP5]]
@ -439,55 +439,27 @@ for.end: ; preds = %for.cond.for.end_cr
; Make sure we are able to vectorize this from now on:
;
define double @bar(ptr nocapture readonly %a, i32 %n) local_unnamed_addr {
; CHECK-X86-LABEL: @bar(
; CHECK-X86-NEXT: entry:
; CHECK-X86-NEXT: [[CMP15:%.*]] = icmp eq i32 [[N:%.*]], 0
; CHECK-X86-NEXT: br i1 [[CMP15]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
; CHECK-X86: for.cond.cleanup:
; CHECK-X86-NEXT: [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ]
; CHECK-X86-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[TMP0]], i32 0
; CHECK-X86-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP0]], i32 1
; CHECK-X86-NEXT: [[MUL:%.*]] = fmul double [[TMP1]], [[TMP2]]
; CHECK-X86-NEXT: ret double [[MUL]]
; CHECK-X86: for.body:
; CHECK-X86-NEXT: [[I_018:%.*]] = phi i32 [ [[ADD5:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
; CHECK-X86-NEXT: [[TMP3:%.*]] = phi <2 x double> [ [[TMP5]], [[FOR_BODY]] ], [ zeroinitializer, [[ENTRY]] ]
; CHECK-X86-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_018]] to i64
; CHECK-X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[IDXPROM]]
; CHECK-X86-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
; CHECK-X86-NEXT: [[TMP5]] = fadd <2 x double> [[TMP3]], [[TMP4]]
; CHECK-X86-NEXT: [[ADD5]] = add i32 [[I_018]], 2
; CHECK-X86-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD5]], [[N]]
; CHECK-X86-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]
; CHECK-LABEL: @bar(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP15:%.*]] = icmp eq i32 [[N:%.*]], 0
; CHECK-NEXT: br i1 [[CMP15]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP0]], i32 1
; CHECK-NEXT: [[MUL:%.*]] = fmul double [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret double [[MUL]]
; CHECK: for.body:
; CHECK-NEXT: [[I_018:%.*]] = phi i32 [ [[ADD5:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x double> [ [[TMP5]], [[FOR_BODY]] ], [ zeroinitializer, [[ENTRY]] ]
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_018]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[TMP5]] = fadd <2 x double> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[ADD5]] = add i32 [[I_018]], 2
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD5]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]
;
; CHECK-AARCH64-LABEL: @bar(
; CHECK-AARCH64-NEXT: entry:
; CHECK-AARCH64-NEXT: [[CMP15:%.*]] = icmp eq i32 [[N:%.*]], 0
; CHECK-AARCH64-NEXT: br i1 [[CMP15]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
; CHECK-AARCH64: for.cond.cleanup:
; CHECK-AARCH64-NEXT: [[X_0_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
; CHECK-AARCH64-NEXT: [[Y_0_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD4:%.*]], [[FOR_BODY]] ]
; CHECK-AARCH64-NEXT: [[MUL:%.*]] = fmul double [[X_0_LCSSA]], [[Y_0_LCSSA]]
; CHECK-AARCH64-NEXT: ret double [[MUL]]
; CHECK-AARCH64: for.body:
; CHECK-AARCH64-NEXT: [[I_018:%.*]] = phi i32 [ [[ADD5:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
; CHECK-AARCH64-NEXT: [[Y_017:%.*]] = phi double [ [[ADD4]], [[FOR_BODY]] ], [ 0.000000e+00, [[ENTRY]] ]
; CHECK-AARCH64-NEXT: [[X_016:%.*]] = phi double [ [[ADD]], [[FOR_BODY]] ], [ 0.000000e+00, [[ENTRY]] ]
; CHECK-AARCH64-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_018]] to i64
; CHECK-AARCH64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[IDXPROM]]
; CHECK-AARCH64-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
; CHECK-AARCH64-NEXT: [[ADD]] = fadd double [[X_016]], [[TMP0]]
; CHECK-AARCH64-NEXT: [[ADD1:%.*]] = or disjoint i32 [[I_018]], 1
; CHECK-AARCH64-NEXT: [[IDXPROM2:%.*]] = zext i32 [[ADD1]] to i64
; CHECK-AARCH64-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IDXPROM2]]
; CHECK-AARCH64-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX3]], align 8
; CHECK-AARCH64-NEXT: [[ADD4]] = fadd double [[Y_017]], [[TMP1]]
; CHECK-AARCH64-NEXT: [[ADD5]] = add i32 [[I_018]], 2
; CHECK-AARCH64-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD5]], [[N]]
; CHECK-AARCH64-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]
;
entry:
%cmp15 = icmp eq i32 %n, 0
br i1 %cmp15, label %for.cond.cleanup, label %for.body

View File

@ -7,8 +7,7 @@ define i32 @test(i1 %.b, i8 %conv18, i32 %k.promoted61) {
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> poison, i1 [[DOTB]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP2]], splat (i1 true)
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i8>
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i1> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i8>
; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i8> [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i8> poison, i8 [[CONV18]], i32 0

View File

@ -1,26 +1,67 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %}
; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %}
; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=X86 %}
; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck --check-prefix=AARCH64 %s %}
define i16 @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 5
; CHECK-NEXT: [[A1:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 6
; CHECK-NEXT: br label [[WHILE:%.*]]
; CHECK: while:
; CHECK-NEXT: [[PH:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX26:%.*]], [[WHILE]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr null, align 8
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A1]], align 16
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr null, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> [[TMP4]])
; CHECK-NEXT: [[OP_RDX:%.*]] = xor i64 0, [[TMP2]]
; CHECK-NEXT: [[OP_RDX24:%.*]] = xor i64 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[OP_RDX25:%.*]] = xor i64 [[OP_RDX]], [[OP_RDX24]]
; CHECK-NEXT: [[OP_RDX26]] = xor i64 [[OP_RDX25]], [[TMP5]]
; CHECK-NEXT: br label [[WHILE]]
; X86-LABEL: @test(
; X86-NEXT: entry:
; X86-NEXT: [[A:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 5
; X86-NEXT: [[A1:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 6
; X86-NEXT: [[A2:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 7
; X86-NEXT: [[A3:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 8
; X86-NEXT: br label [[WHILE:%.*]]
; X86: while:
; X86-NEXT: [[PH:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX1:%.*]], [[WHILE]] ]
; X86-NEXT: [[TMP0:%.*]] = load i64, ptr null, align 8
; X86-NEXT: [[TMP1:%.*]] = load i64, ptr null, align 8
; X86-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[A2]], align 8
; X86-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[A]], align 8
; X86-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
; X86-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr [[A1]], align 16
; X86-NEXT: [[TMP6:%.*]] = load i64, ptr [[A3]], align 16
; X86-NEXT: [[TMP7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP1]], i32 0
; X86-NEXT: [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; X86-NEXT: [[TMP9:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> [[TMP12]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; X86-NEXT: [[TMP10:%.*]] = insertelement <8 x i64> [[TMP9]], i64 [[TMP0]], i32 3
; X86-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; X86-NEXT: [[TMP16:%.*]] = shufflevector <8 x i64> [[TMP10]], <8 x i64> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; X86-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; X86-NEXT: [[TMP17:%.*]] = shufflevector <8 x i64> [[TMP16]], <8 x i64> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; X86-NEXT: [[TMP14:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 3, i32 4, i32 5, i32 6, i32 8>
; X86-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> [[TMP14]])
; X86-NEXT: [[OP_RDX:%.*]] = xor i64 [[TMP15]], [[TMP6]]
; X86-NEXT: [[OP_RDX1]] = xor i64 [[OP_RDX]], [[TMP6]]
; X86-NEXT: br label [[WHILE]]
;
; AARCH64-LABEL: @test(
; AARCH64-NEXT: entry:
; AARCH64-NEXT: [[A:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 5
; AARCH64-NEXT: [[A1:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 6
; AARCH64-NEXT: [[A2:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 7
; AARCH64-NEXT: [[A3:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 8
; AARCH64-NEXT: br label [[WHILE:%.*]]
; AARCH64: while:
; AARCH64-NEXT: [[PH:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX5:%.*]], [[WHILE]] ]
; AARCH64-NEXT: [[TMP0:%.*]] = load i64, ptr null, align 8
; AARCH64-NEXT: [[TMP2:%.*]] = load i64, ptr null, align 8
; AARCH64-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[A2]], align 8
; AARCH64-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr [[A]], align 8
; AARCH64-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
; AARCH64-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[A1]], align 16
; AARCH64-NEXT: [[TMP6:%.*]] = load i64, ptr [[A3]], align 16
; AARCH64-NEXT: [[TMP7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP2]], i32 0
; AARCH64-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; AARCH64-NEXT: [[TMP15:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> [[TMP9]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; AARCH64-NEXT: [[TMP10:%.*]] = insertelement <8 x i64> [[TMP15]], i64 [[TMP0]], i32 3
; AARCH64-NEXT: [[TMP16:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; AARCH64-NEXT: [[TMP17:%.*]] = shufflevector <8 x i64> [[TMP10]], <8 x i64> [[TMP16]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; AARCH64-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; AARCH64-NEXT: [[TMP14:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; AARCH64-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP14]], <8 x i64> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 3, i32 4, i32 5, i32 6, i32 8>
; AARCH64-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> [[TMP11]])
; AARCH64-NEXT: [[OP_RDX:%.*]] = xor i64 [[TMP12]], [[TMP6]]
; AARCH64-NEXT: [[OP_RDX5]] = xor i64 [[OP_RDX]], [[TMP6]]
; AARCH64-NEXT: br label [[WHILE]]
;
entry:
%a = getelementptr [1000 x i64], ptr null, i64 0, i64 5

View File

@ -19,8 +19,7 @@ define void @func(i32 %0) {
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i32> [[TMP16]], <32 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 32, i32 33, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 poison, i32 poison, i32 30, i32 31>
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <32 x i32> [[TMP14]], <32 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 32, i32 33, i32 30, i32 31>
; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i32> [[TMP8]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = sext <32 x i32> [[TMP18]] to <32 x i64>
; CHECK-NEXT: [[TMP20:%.*]] = icmp slt <32 x i64> [[TMP19]], zeroinitializer
; CHECK-NEXT: [[TMP20:%.*]] = icmp slt <32 x i32> [[TMP18]], zeroinitializer
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <32 x i1> [[TMP20]], i32 31
; CHECK-NEXT: [[TMP22:%.*]] = and i1 false, [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i1> [[TMP20]], i32 30