[SLP] Make getSameOpcode support interchangeable instructions. (#135797)

We use the term "interchangeable instructions" to refer to different
operators that have the same meaning (e.g., `add x, 0` is equivalent to
`mul x, 1`).
Non-constant values are not supported, as they may incur high costs with
little benefit.

---------

Co-authored-by: Alexey Bataev <a.bataev@gmx.com>
This commit is contained in:
Han-Kuan Chen 2025-04-16 00:08:59 +08:00 committed by GitHub
parent 289baf1f42
commit d41e517748
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 647 additions and 201 deletions

View File

@ -599,6 +599,28 @@ static std::optional<unsigned> getElementIndex(const Value *Inst,
return Index; return Index;
} }
/// \returns true if all of the values in \p VL use the same opcode.
/// For comparison instructions, also checks if predicates match.
/// PoisonValues are considered matching.
/// Interchangeable instructions are not considered.
static bool allSameOpcode(ArrayRef<Value *> VL) {
auto *It = find_if(VL, IsaPred<Instruction>);
if (It == VL.end())
return true;
Instruction *MainOp = cast<Instruction>(*It);
unsigned Opcode = MainOp->getOpcode();
bool IsCmpOp = isa<CmpInst>(MainOp);
CmpInst::Predicate BasePred = IsCmpOp ? cast<CmpInst>(MainOp)->getPredicate()
: CmpInst::BAD_ICMP_PREDICATE;
return std::all_of(It, VL.end(), [&](Value *V) {
if (auto *CI = dyn_cast<CmpInst>(V))
return BasePred == CI->getPredicate();
if (auto *I = dyn_cast<Instruction>(V))
return I->getOpcode() == Opcode;
return isa<PoisonValue>(V);
});
}
namespace { namespace {
/// Specifies the way the mask should be analyzed for undefs/poisonous elements /// Specifies the way the mask should be analyzed for undefs/poisonous elements
/// in the shuffle mask. /// in the shuffle mask.
@ -814,6 +836,272 @@ static std::optional<unsigned> getExtractIndex(const Instruction *E) {
} }
namespace { namespace {
/// \returns true if \p Opcode is allowed as part of the main/alternate
/// instruction for SLP vectorization.
///
/// Example of unsupported opcode is SDIV that can potentially cause UB if the
/// "shuffled out" lane would result in division by zero.
bool isValidForAlternation(unsigned Opcode) {
return !Instruction::isIntDivRem(Opcode);
}
/// Helper class that determines VL can use the same opcode.
/// Alternate instruction is supported. In addition, it supports interchangeable
/// instruction. An interchangeable instruction is an instruction that can be
/// converted to another instruction with same semantics. For example, x << 1 is
/// equal to x * 2. x * 1 is equal to x | 0.
class BinOpSameOpcodeHelper {
using MaskType = std::uint_fast16_t;
/// Sort SupportedOp because it is used by binary_search.
constexpr static std::initializer_list<unsigned> SupportedOp = {
Instruction::Add, Instruction::Sub, Instruction::Mul, Instruction::Shl,
Instruction::AShr, Instruction::And, Instruction::Or, Instruction::Xor};
enum : MaskType {
ShlBIT = 0b1,
AShrBIT = 0b10,
MulBIT = 0b100,
AddBIT = 0b1000,
SubBIT = 0b10000,
AndBIT = 0b100000,
OrBIT = 0b1000000,
XorBIT = 0b10000000,
MainOpBIT = 0b100000000,
LLVM_MARK_AS_BITMASK_ENUM(MainOpBIT)
};
/// Return a non-nullptr if either operand of I is a ConstantInt.
/// The second return value represents the operand position. We check the
/// right-hand side first (1). If the right hand side is not a ConstantInt and
/// the instruction is neither Sub, Shl, nor AShr, we then check the left hand
/// side (0).
static std::pair<ConstantInt *, unsigned>
isBinOpWithConstantInt(const Instruction *I) {
unsigned Opcode = I->getOpcode();
assert(binary_search(SupportedOp, Opcode) && "Unsupported opcode.");
(void)SupportedOp;
auto *BinOp = cast<BinaryOperator>(I);
if (auto *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)))
return {CI, 1};
if (Opcode == Instruction::Sub || Opcode == Instruction::Shl ||
Opcode == Instruction::AShr)
return {nullptr, 0};
if (auto *CI = dyn_cast<ConstantInt>(BinOp->getOperand(0)))
return {CI, 0};
return {nullptr, 0};
}
struct InterchangeableInfo {
const Instruction *I = nullptr;
/// The bit it sets represents whether MainOp can be converted to.
MaskType Mask = MainOpBIT | XorBIT | OrBIT | AndBIT | SubBIT | AddBIT |
MulBIT | AShrBIT | ShlBIT;
/// We cannot create an interchangeable instruction that does not exist in
/// VL. For example, VL [x + 0, y * 1] can be converted to [x << 0, y << 0],
/// but << does not exist in VL. In the end, we convert VL to [x * 1, y *
/// 1]. SeenBefore is used to know what operations have been seen before.
MaskType SeenBefore = 0;
InterchangeableInfo(const Instruction *I) : I(I) {}
/// Return false allows BinOpSameOpcodeHelper to find an alternate
/// instruction. Directly setting the mask will destroy the mask state,
/// preventing us from determining which instruction it should convert to.
bool trySet(MaskType OpcodeInMaskForm, MaskType InterchangeableMask) {
if (Mask & InterchangeableMask) {
SeenBefore |= OpcodeInMaskForm;
Mask &= InterchangeableMask;
return true;
}
return false;
}
bool equal(unsigned Opcode) {
if (Opcode == I->getOpcode())
return trySet(MainOpBIT, MainOpBIT);
return false;
}
unsigned getOpcode() const {
MaskType Candidate = Mask & SeenBefore;
if (Candidate & MainOpBIT)
return I->getOpcode();
if (Candidate & ShlBIT)
return Instruction::Shl;
if (Candidate & AShrBIT)
return Instruction::AShr;
if (Candidate & MulBIT)
return Instruction::Mul;
if (Candidate & AddBIT)
return Instruction::Add;
if (Candidate & SubBIT)
return Instruction::Sub;
if (Candidate & AndBIT)
return Instruction::And;
if (Candidate & OrBIT)
return Instruction::Or;
if (Candidate & XorBIT)
return Instruction::Xor;
llvm_unreachable("Cannot find interchangeable instruction.");
}
SmallVector<Value *> getOperand(const Instruction *To) const {
unsigned ToOpcode = To->getOpcode();
unsigned FromOpcode = I->getOpcode();
if (FromOpcode == ToOpcode)
return SmallVector<Value *>(I->operands());
assert(binary_search(SupportedOp, ToOpcode) && "Unsupported opcode.");
auto [CI, Pos] = isBinOpWithConstantInt(I);
const APInt &FromCIValue = CI->getValue();
unsigned FromCIValueBitWidth = FromCIValue.getBitWidth();
APInt ToCIValue;
switch (FromOpcode) {
case Instruction::Shl:
if (ToOpcode == Instruction::Mul) {
ToCIValue = APInt::getOneBitSet(FromCIValueBitWidth,
FromCIValue.getZExtValue());
} else {
assert(FromCIValue.isZero() && "Cannot convert the instruction.");
ToCIValue = ToOpcode == Instruction::And
? APInt::getAllOnes(FromCIValueBitWidth)
: APInt::getZero(FromCIValueBitWidth);
}
break;
case Instruction::Mul:
assert(FromCIValue.isPowerOf2() && "Cannot convert the instruction.");
if (ToOpcode == Instruction::Shl) {
ToCIValue = APInt(FromCIValueBitWidth, FromCIValue.logBase2());
} else {
assert(FromCIValue.isOne() && "Cannot convert the instruction.");
ToCIValue = ToOpcode == Instruction::And
? APInt::getAllOnes(FromCIValueBitWidth)
: APInt::getZero(FromCIValueBitWidth);
}
break;
case Instruction::Add:
case Instruction::Sub:
if (FromCIValue.isZero()) {
ToCIValue = APInt::getZero(FromCIValueBitWidth);
} else {
assert(is_contained({Instruction::Add, Instruction::Sub}, ToOpcode) &&
"Cannot convert the instruction.");
ToCIValue = FromCIValue;
ToCIValue.negate();
}
break;
case Instruction::And:
assert(FromCIValue.isAllOnes() && "Cannot convert the instruction.");
ToCIValue = ToOpcode == Instruction::Mul
? APInt::getOneBitSet(FromCIValueBitWidth, 0)
: APInt::getZero(FromCIValueBitWidth);
break;
default:
assert(FromCIValue.isZero() && "Cannot convert the instruction.");
ToCIValue = APInt::getZero(FromCIValueBitWidth);
break;
}
Value *LHS = I->getOperand(1 - Pos);
Constant *RHS =
ConstantInt::get(I->getOperand(Pos)->getType(), ToCIValue);
if (Pos == 1)
return SmallVector<Value *>({LHS, RHS});
return SmallVector<Value *>({RHS, LHS});
}
};
InterchangeableInfo MainOp;
InterchangeableInfo AltOp;
bool isValidForAlternation(const Instruction *I) const {
return ::isValidForAlternation(MainOp.I->getOpcode()) &&
::isValidForAlternation(I->getOpcode());
}
bool initializeAltOp(const Instruction *I) {
if (AltOp.I)
return true;
if (!isValidForAlternation(I))
return false;
AltOp.I = I;
return true;
}
public:
BinOpSameOpcodeHelper(const Instruction *MainOp,
const Instruction *AltOp = nullptr)
: MainOp(MainOp), AltOp(AltOp) {
assert(is_sorted(SupportedOp) && "SupportedOp is not sorted.");
}
bool add(const Instruction *I) {
assert(isa<BinaryOperator>(I) &&
"BinOpSameOpcodeHelper only accepts BinaryOperator.");
unsigned Opcode = I->getOpcode();
MaskType OpcodeInMaskForm;
// Prefer Shl, AShr, Mul, Add, Sub, And, Or and Xor over MainOp.
switch (Opcode) {
case Instruction::Shl:
OpcodeInMaskForm = ShlBIT;
break;
case Instruction::AShr:
OpcodeInMaskForm = AShrBIT;
break;
case Instruction::Mul:
OpcodeInMaskForm = MulBIT;
break;
case Instruction::Add:
OpcodeInMaskForm = AddBIT;
break;
case Instruction::Sub:
OpcodeInMaskForm = SubBIT;
break;
case Instruction::And:
OpcodeInMaskForm = AndBIT;
break;
case Instruction::Or:
OpcodeInMaskForm = OrBIT;
break;
case Instruction::Xor:
OpcodeInMaskForm = XorBIT;
break;
default:
return MainOp.equal(Opcode) ||
(initializeAltOp(I) && AltOp.equal(Opcode));
}
MaskType InterchangeableMask = OpcodeInMaskForm;
ConstantInt *CI = isBinOpWithConstantInt(I).first;
if (CI) {
constexpr MaskType CanBeAll =
XorBIT | OrBIT | AndBIT | SubBIT | AddBIT | MulBIT | AShrBIT | ShlBIT;
const APInt &CIValue = CI->getValue();
switch (Opcode) {
case Instruction::Shl:
if (CIValue.ult(CIValue.getBitWidth()))
InterchangeableMask = CIValue.isZero() ? CanBeAll : MulBIT | ShlBIT;
break;
case Instruction::Mul:
if (CIValue.isOne()) {
InterchangeableMask = CanBeAll;
break;
}
if (CIValue.isPowerOf2())
InterchangeableMask = MulBIT | ShlBIT;
break;
case Instruction::Add:
case Instruction::Sub:
InterchangeableMask = CIValue.isZero() ? CanBeAll : SubBIT | AddBIT;
break;
case Instruction::And:
if (CIValue.isAllOnes())
InterchangeableMask = CanBeAll;
break;
default:
if (CIValue.isZero())
InterchangeableMask = CanBeAll;
break;
}
}
return MainOp.trySet(OpcodeInMaskForm, InterchangeableMask) ||
(initializeAltOp(I) &&
AltOp.trySet(OpcodeInMaskForm, InterchangeableMask));
}
unsigned getMainOpcode() const { return MainOp.getOpcode(); }
bool hasAltOp() const { return AltOp.I; }
unsigned getAltOpcode() const {
return hasAltOp() ? AltOp.getOpcode() : getMainOpcode();
}
SmallVector<Value *> getOperand(const Instruction *I) const {
return MainOp.getOperand(I);
}
};
/// Main data required for vectorization of instructions. /// Main data required for vectorization of instructions.
class InstructionsState { class InstructionsState {
@ -861,9 +1149,27 @@ public:
/// Some of the instructions in the list have alternate opcodes. /// Some of the instructions in the list have alternate opcodes.
bool isAltShuffle() const { return getMainOp() != getAltOp(); } bool isAltShuffle() const { return getMainOp() != getAltOp(); }
bool isOpcodeOrAlt(Instruction *I) const { /// Checks if the instruction matches either the main or alternate opcode.
unsigned CheckedOpcode = I->getOpcode(); /// \returns
return getOpcode() == CheckedOpcode || getAltOpcode() == CheckedOpcode; /// - MainOp if \param I matches MainOp's opcode directly or can be converted
/// to it
/// - AltOp if \param I matches AltOp's opcode directly or can be converted to
/// it
/// - nullptr if \param I cannot be matched or converted to either opcode
Instruction *getMatchingMainOpOrAltOp(Instruction *I) const {
assert(MainOp && "MainOp cannot be nullptr.");
if (I->getOpcode() == MainOp->getOpcode())
return MainOp;
// Prefer AltOp instead of interchangeable instruction of MainOp.
assert(AltOp && "AltOp cannot be nullptr.");
if (I->getOpcode() == AltOp->getOpcode())
return AltOp;
if (!I->isBinaryOp())
return nullptr;
BinOpSameOpcodeHelper Converter(MainOp);
if (Converter.add(I) && Converter.add(MainOp) && !Converter.hasAltOp())
return MainOp;
return AltOp;
} }
/// Checks if main/alt instructions are shift operations. /// Checks if main/alt instructions are shift operations.
@ -913,23 +1219,41 @@ public:
static InstructionsState invalid() { return {nullptr, nullptr}; } static InstructionsState invalid() { return {nullptr, nullptr}; }
}; };
} // end anonymous namespace std::pair<Instruction *, SmallVector<Value *>>
convertTo(Instruction *I, const InstructionsState &S) {
/// \returns true if \p Opcode is allowed as part of the main/alternate Instruction *SelectedOp = S.getMatchingMainOpOrAltOp(I);
/// instruction for SLP vectorization. assert(SelectedOp && "Cannot convert the instruction.");
/// if (I->isBinaryOp()) {
/// Example of unsupported opcode is SDIV that can potentially cause UB if the BinOpSameOpcodeHelper Converter(I);
/// "shuffled out" lane would result in division by zero. return std::make_pair(SelectedOp, Converter.getOperand(SelectedOp));
static bool isValidForAlternation(unsigned Opcode) {
if (Instruction::isIntDivRem(Opcode))
return false;
return true;
} }
return std::make_pair(SelectedOp, SmallVector<Value *>(I->operands()));
}
} // end anonymous namespace
static InstructionsState getSameOpcode(ArrayRef<Value *> VL, static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
const TargetLibraryInfo &TLI); const TargetLibraryInfo &TLI);
/// Find an instruction with a specific opcode in VL.
/// \param VL Array of values to search through. Must contain only Instructions
/// and PoisonValues.
/// \param Opcode The instruction opcode to search for
/// \returns
/// - The first instruction found with matching opcode
/// - nullptr if no matching instruction is found
Instruction *findInstructionWithOpcode(ArrayRef<Value *> VL, unsigned Opcode) {
for (Value *V : VL) {
if (isa<PoisonValue>(V))
continue;
assert(isa<Instruction>(V) && "Only accepts PoisonValue and Instruction.");
auto *Inst = cast<Instruction>(V);
if (Inst->getOpcode() == Opcode)
return Inst;
}
return nullptr;
}
/// Checks if the provided operands of 2 cmp instructions are compatible, i.e. /// Checks if the provided operands of 2 cmp instructions are compatible, i.e.
/// compatible instructions or constants, or just some other regular values. /// compatible instructions or constants, or just some other regular values.
static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0, static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0,
@ -993,6 +1317,7 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
unsigned Opcode = MainOp->getOpcode(); unsigned Opcode = MainOp->getOpcode();
unsigned AltOpcode = Opcode; unsigned AltOpcode = Opcode;
BinOpSameOpcodeHelper BinOpHelper(MainOp);
bool SwappedPredsCompatible = IsCmpOp && [&]() { bool SwappedPredsCompatible = IsCmpOp && [&]() {
SetVector<unsigned> UniquePreds, UniqueNonSwappedPreds; SetVector<unsigned> UniquePreds, UniqueNonSwappedPreds;
UniquePreds.insert(BasePred); UniquePreds.insert(BasePred);
@ -1039,14 +1364,8 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
return InstructionsState::invalid(); return InstructionsState::invalid();
unsigned InstOpcode = I->getOpcode(); unsigned InstOpcode = I->getOpcode();
if (IsBinOp && isa<BinaryOperator>(I)) { if (IsBinOp && isa<BinaryOperator>(I)) {
if (InstOpcode == Opcode || InstOpcode == AltOpcode) if (BinOpHelper.add(I))
continue; continue;
if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
isValidForAlternation(Opcode)) {
AltOpcode = InstOpcode;
AltOp = I;
continue;
}
} else if (IsCastOp && isa<CastInst>(I)) { } else if (IsCastOp && isa<CastInst>(I)) {
Value *Op0 = MainOp->getOperand(0); Value *Op0 = MainOp->getOperand(0);
Type *Ty0 = Op0->getType(); Type *Ty0 = Op0->getType();
@ -1147,7 +1466,22 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
return InstructionsState::invalid(); return InstructionsState::invalid();
} }
return InstructionsState(MainOp, AltOp); if (IsBinOp) {
MainOp = findInstructionWithOpcode(VL, BinOpHelper.getMainOpcode());
assert(MainOp && "Cannot find MainOp with Opcode from BinOpHelper.");
AltOp = findInstructionWithOpcode(VL, BinOpHelper.getAltOpcode());
assert(MainOp && "Cannot find AltOp with Opcode from BinOpHelper.");
}
assert((MainOp == AltOp || !allSameOpcode(VL)) &&
"Incorrect implementation of allSameOpcode.");
InstructionsState S(MainOp, AltOp);
assert(all_of(VL,
[&](Value *V) {
return isa<PoisonValue>(V) ||
S.getMatchingMainOpOrAltOp(cast<Instruction>(V));
}) &&
"Invalid InstructionsState.");
return S;
} }
/// \returns true if all of the values in \p VL have the same type or false /// \returns true if all of the values in \p VL have the same type or false
@ -2560,11 +2894,11 @@ public:
// Since operand reordering is performed on groups of commutative // Since operand reordering is performed on groups of commutative
// operations or alternating sequences (e.g., +, -), we can safely tell // operations or alternating sequences (e.g., +, -), we can safely tell
// the inverse operations by checking commutativity. // the inverse operations by checking commutativity.
bool IsInverseOperation = !isCommutative(cast<Instruction>(V)); auto [SelectedOp, Ops] = convertTo(cast<Instruction>(VL[Lane]), S);
bool IsInverseOperation = !isCommutative(SelectedOp);
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
bool APO = (OpIdx == 0) ? false : IsInverseOperation; bool APO = (OpIdx == 0) ? false : IsInverseOperation;
OpsVec[OpIdx][Lane] = {cast<Instruction>(V)->getOperand(OpIdx), APO, OpsVec[OpIdx][Lane] = {Ops[OpIdx], APO, false};
false};
} }
} }
} }
@ -3542,14 +3876,16 @@ private:
/// Some of the instructions in the list have alternate opcodes. /// Some of the instructions in the list have alternate opcodes.
bool isAltShuffle() const { return S.isAltShuffle(); } bool isAltShuffle() const { return S.isAltShuffle(); }
bool isOpcodeOrAlt(Instruction *I) const { return S.isOpcodeOrAlt(I); } Instruction *getMatchingMainOpOrAltOp(Instruction *I) const {
return S.getMatchingMainOpOrAltOp(I);
}
/// Chooses the correct key for scheduling data. If \p Op has the same (or /// Chooses the correct key for scheduling data. If \p Op has the same (or
/// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is /// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is
/// \p OpValue. /// \p OpValue.
Value *isOneOf(Value *Op) const { Value *isOneOf(Value *Op) const {
auto *I = dyn_cast<Instruction>(Op); auto *I = dyn_cast<Instruction>(Op);
if (I && isOpcodeOrAlt(I)) if (I && getMatchingMainOpOrAltOp(I))
return Op; return Op;
return S.getMainOp(); return S.getMainOp();
} }
@ -8428,11 +8764,15 @@ static std::pair<size_t, size_t> generateKeySubkey(
return std::make_pair(Key, SubKey); return std::make_pair(Key, SubKey);
} }
/// Checks if the specified instruction \p I is an main operation for the given
/// \p MainOp and \p AltOp instructions.
static bool isMainInstruction(Instruction *I, Instruction *MainOp,
Instruction *AltOp, const TargetLibraryInfo &TLI);
/// Checks if the specified instruction \p I is an alternate operation for /// Checks if the specified instruction \p I is an alternate operation for
/// the given \p MainOp and \p AltOp instructions. /// the given \p MainOp and \p AltOp instructions.
static bool isAlternateInstruction(const Instruction *I, static bool isAlternateInstruction(Instruction *I, Instruction *MainOp,
const Instruction *MainOp, Instruction *AltOp,
const Instruction *AltOp,
const TargetLibraryInfo &TLI); const TargetLibraryInfo &TLI);
bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S, bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
@ -9245,7 +9585,8 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
continue; continue;
} }
if ((LocalState.getAltOpcode() != LocalState.getOpcode() && if ((LocalState.getAltOpcode() != LocalState.getOpcode() &&
I->getOpcode() == LocalState.getOpcode()) || isMainInstruction(I, LocalState.getMainOp(), LocalState.getAltOp(),
*TLI)) ||
(LocalState.getAltOpcode() == LocalState.getOpcode() && (LocalState.getAltOpcode() == LocalState.getOpcode() &&
!isAlternateInstruction(I, LocalState.getMainOp(), !isAlternateInstruction(I, LocalState.getMainOp(),
LocalState.getAltOp(), *TLI))) { LocalState.getAltOp(), *TLI))) {
@ -10344,9 +10685,14 @@ void BoUpSLP::TreeEntry::buildAltOpShuffleMask(
} }
} }
static bool isAlternateInstruction(const Instruction *I, static bool isMainInstruction(Instruction *I, Instruction *MainOp,
const Instruction *MainOp, Instruction *AltOp,
const Instruction *AltOp, const TargetLibraryInfo &TLI) {
return InstructionsState(MainOp, AltOp).getMatchingMainOpOrAltOp(I) == MainOp;
}
static bool isAlternateInstruction(Instruction *I, Instruction *MainOp,
Instruction *AltOp,
const TargetLibraryInfo &TLI) { const TargetLibraryInfo &TLI) {
if (auto *MainCI = dyn_cast<CmpInst>(MainOp)) { if (auto *MainCI = dyn_cast<CmpInst>(MainOp)) {
auto *AltCI = cast<CmpInst>(AltOp); auto *AltCI = cast<CmpInst>(AltOp);
@ -10366,7 +10712,7 @@ static bool isAlternateInstruction(const Instruction *I,
"their swap."); "their swap.");
return MainP != P && MainP != SwappedP; return MainP != P && MainP != SwappedP;
} }
return I->getOpcode() == AltOp->getOpcode(); return InstructionsState(MainOp, AltOp).getMatchingMainOpOrAltOp(I) == AltOp;
} }
TTI::OperandValueInfo BoUpSLP::getOperandInfo(ArrayRef<Value *> Ops) { TTI::OperandValueInfo BoUpSLP::getOperandInfo(ArrayRef<Value *> Ops) {
@ -11129,7 +11475,9 @@ void BoUpSLP::transformNodes() {
// same opcode and same parent block or all constants. // same opcode and same parent block or all constants.
if (VL.size() <= 2 || LoadEntriesToVectorize.contains(Idx) || if (VL.size() <= 2 || LoadEntriesToVectorize.contains(Idx) ||
!(!E.hasState() || E.getOpcode() == Instruction::Load || !(!E.hasState() || E.getOpcode() == Instruction::Load ||
E.isAltShuffle() || !allSameBlock(VL)) || // We use allSameOpcode instead of isAltShuffle because we don't
// want to use interchangeable instruction here.
!allSameOpcode(VL) || !allSameBlock(VL)) ||
allConstant(VL) || isSplat(VL)) allConstant(VL) || isSplat(VL))
continue; continue;
if (ForceLoadGather && E.hasState() && E.getOpcode() == Instruction::Load) if (ForceLoadGather && E.hasState() && E.getOpcode() == Instruction::Load)
@ -11174,7 +11522,7 @@ void BoUpSLP::transformNodes() {
if (IsSplat) if (IsSplat)
continue; continue;
InstructionsState S = getSameOpcode(Slice, *TLI); InstructionsState S = getSameOpcode(Slice, *TLI);
if (!S || S.isAltShuffle() || !allSameBlock(Slice) || if (!S || !allSameOpcode(Slice) || !allSameBlock(Slice) ||
(S.getOpcode() == Instruction::Load && (S.getOpcode() == Instruction::Load &&
areKnownNonVectorizableLoads(Slice)) || areKnownNonVectorizableLoads(Slice)) ||
(S.getOpcode() != Instruction::Load && (S.getOpcode() != Instruction::Load &&
@ -12974,14 +13322,22 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
if (isa<PoisonValue>(UniqueValues[Idx])) if (isa<PoisonValue>(UniqueValues[Idx]))
return InstructionCost(TTI::TCC_Free); return InstructionCost(TTI::TCC_Free);
auto *VI = cast<Instruction>(UniqueValues[Idx]); // We cannot retrieve the operand from UniqueValues[Idx] because an
unsigned OpIdx = isa<UnaryOperator>(VI) ? 0 : 1; // interchangeable instruction may be used. The order and the actual
TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(VI->getOperand(0)); // operand might differ from what is retrieved from UniqueValues[Idx].
TTI::OperandValueInfo Op2Info = Value *Op1 = E->getOperand(0)[Idx];
TTI::getOperandInfo(VI->getOperand(OpIdx)); Value *Op2;
SmallVector<const Value *> Operands(VI->operand_values()); SmallVector<const Value *, 2> Operands(1, Op1);
if (isa<UnaryOperator>(UniqueValues[Idx])) {
Op2 = Op1;
} else {
Op2 = E->getOperand(1)[Idx];
Operands.push_back(Op2);
}
TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Op1);
TTI::OperandValueInfo Op2Info = TTI::getOperandInfo(Op2);
return TTI->getArithmeticInstrCost(ShuffleOrOp, OrigScalarTy, CostKind, return TTI->getArithmeticInstrCost(ShuffleOrOp, OrigScalarTy, CostKind,
Op1Info, Op2Info, Operands, VI); Op1Info, Op2Info, Operands);
}; };
auto GetVectorCost = [=](InstructionCost CommonCost) { auto GetVectorCost = [=](InstructionCost CommonCost) {
if (ShuffleOrOp == Instruction::And && It != MinBWs.end()) { if (ShuffleOrOp == Instruction::And && It != MinBWs.end()) {
@ -13211,7 +13567,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
return InstructionCost(TTI::TCC_Free); return InstructionCost(TTI::TCC_Free);
auto *VI = cast<Instruction>(UniqueValues[Idx]); auto *VI = cast<Instruction>(UniqueValues[Idx]);
assert(E->isOpcodeOrAlt(VI) && "Unexpected main/alternate opcode"); assert(E->getMatchingMainOpOrAltOp(VI) &&
"Unexpected main/alternate opcode");
(void)E; (void)E;
return TTI->getInstructionCost(VI, CostKind); return TTI->getInstructionCost(VI, CostKind);
}; };
@ -13279,7 +13636,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
SmallVector<int> Mask; SmallVector<int> Mask;
E->buildAltOpShuffleMask( E->buildAltOpShuffleMask(
[&](Instruction *I) { [&](Instruction *I) {
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); assert(E->getMatchingMainOpOrAltOp(I) &&
"Unexpected main/alternate opcode");
return isAlternateInstruction(I, E->getMainOp(), E->getAltOp(), return isAlternateInstruction(I, E->getMainOp(), E->getAltOp(),
*TLI); *TLI);
}, },
@ -15441,7 +15799,8 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
!isa<GetElementPtrInst>(V)) !isa<GetElementPtrInst>(V))
return true; return true;
auto *I = dyn_cast<Instruction>(V); auto *I = dyn_cast<Instruction>(V);
return !I || !E->isOpcodeOrAlt(I) || I->getParent() == BB || return !I || !E->getMatchingMainOpOrAltOp(I) ||
I->getParent() == BB ||
isVectorLikeInstWithConstOps(I); isVectorLikeInstWithConstOps(I);
})) && })) &&
"Expected gathered loads or GEPs or instructions from same basic " "Expected gathered loads or GEPs or instructions from same basic "
@ -17585,7 +17944,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *V = Builder.CreateBinOp( Value *V = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
RHS); RHS);
propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end()); propagateIRFlags(V, E->Scalars, nullptr, It == MinBWs.end());
if (auto *I = dyn_cast<Instruction>(V)) { if (auto *I = dyn_cast<Instruction>(V)) {
V = ::propagateMetadata(I, E->Scalars); V = ::propagateMetadata(I, E->Scalars);
// Drop nuw flags for abs(sub(commutative), true). // Drop nuw flags for abs(sub(commutative), true).
@ -18005,7 +18364,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
SmallVector<int> Mask; SmallVector<int> Mask;
E->buildAltOpShuffleMask( E->buildAltOpShuffleMask(
[E, this](Instruction *I) { [E, this](Instruction *I) {
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); assert(E->getMatchingMainOpOrAltOp(I) &&
"Unexpected main/alternate opcode");
return isAlternateInstruction(I, E->getMainOp(), E->getAltOp(), return isAlternateInstruction(I, E->getMainOp(), E->getAltOp(),
*TLI); *TLI);
}, },
@ -21796,7 +22156,7 @@ public:
// Also check if the instruction was folded to constant/other value. // Also check if the instruction was folded to constant/other value.
auto *Inst = dyn_cast<Instruction>(RdxVal); auto *Inst = dyn_cast<Instruction>(RdxVal);
if ((Inst && isVectorLikeInstWithConstOps(Inst) && if ((Inst && isVectorLikeInstWithConstOps(Inst) &&
(!S || !S.isOpcodeOrAlt(Inst))) || (!S || !S.getMatchingMainOpOrAltOp(Inst))) ||
(S && !Inst)) (S && !Inst))
continue; continue;
Candidates.push_back(RdxVal); Candidates.push_back(RdxVal);

View File

@ -314,10 +314,10 @@ define void @store_try_reorder(ptr %dst) {
; ;
; POW2-ONLY-LABEL: @store_try_reorder( ; POW2-ONLY-LABEL: @store_try_reorder(
; POW2-ONLY-NEXT: entry: ; POW2-ONLY-NEXT: entry:
; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0 ; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887:%.*]], align 4
; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1 ; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[ARRAYIDX_I1887]], i64 2
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4 ; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
; POW2-ONLY-NEXT: ret void ; POW2-ONLY-NEXT: ret void
; ;
entry: entry:

View File

@ -7,13 +7,12 @@ define void @test(ptr %a, i64 %0) {
; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
; CHECK-NEXT: br label %[[BB:.*]] ; CHECK-NEXT: br label %[[BB:.*]]
; CHECK: [[BB]]: ; CHECK: [[BB]]:
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = or disjoint <2 x i64> [[TMP3]], <i64 1, i64 0>
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = extractelement <2 x ptr> [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison) ; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison)
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8

View File

@ -324,10 +324,10 @@ define void @store_try_reorder(ptr %dst) {
; ;
; POW2-ONLY-LABEL: @store_try_reorder( ; POW2-ONLY-LABEL: @store_try_reorder(
; POW2-ONLY-NEXT: entry: ; POW2-ONLY-NEXT: entry:
; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0 ; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887:%.*]], align 4
; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1 ; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[ARRAYIDX_I1887]], i64 2
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4 ; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
; POW2-ONLY-NEXT: ret void ; POW2-ONLY-NEXT: ret void
; ;
entry: entry:

View File

@ -0,0 +1,36 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=slp-vectorizer -S %s | FileCheck %s
define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 0, 0
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, 1
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 0
; CHECK-NEXT: [[UMIN120:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[TMP2]])
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 0, 0
; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP3]], 0
; CHECK-NEXT: [[UMIN122:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN120]], i64 [[TMP4]])
; CHECK-NEXT: [[TMP5:%.*]] = add i64 0, 1
; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 0
; CHECK-NEXT: [[UMIN123:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN122]], i64 [[TMP6]])
; CHECK-NEXT: [[UMIN124:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN123]], i64 0)
; CHECK-NEXT: ret void
;
entry:
%0 = mul i64 0, 0
%1 = lshr i64 %0, 0
%2 = sub i64 0, 1
%3 = lshr i64 %2, 0
%umin120 = call i64 @llvm.umin.i64(i64 %1, i64 %3)
%4 = sub i64 0, 0
%5 = lshr i64 %4, 0
%umin122 = call i64 @llvm.umin.i64(i64 %umin120, i64 %5)
%6 = add i64 0, 1
%7 = lshr i64 %6, 0
%umin123 = call i64 @llvm.umin.i64(i64 %umin122, i64 %7)
%umin124 = call i64 @llvm.umin.i64(i64 %umin123, i64 0)
ret void
}
declare i64 @llvm.umin.i64(i64, i64)

View File

@ -10,9 +10,7 @@ define i32 @foo(ptr nocapture %A, i32 %n) {
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar() ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar()
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10> ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9) ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9)
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
; CHECK-NEXT: ret i32 undef ; CHECK-NEXT: ret i32 undef

View File

@ -10,15 +10,10 @@ define void @test(ptr %0, ptr %1, ptr %2) {
; CHECK-NEXT: [[TMP11:%.*]] = sub <4 x i32> <i32 0, i32 0, i32 undef, i32 0>, [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = sub <4 x i32> <i32 0, i32 0, i32 undef, i32 0>, [[TMP8]]
; CHECK-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[TMP11]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[TMP11]], [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]] ; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]]
; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0> ; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> <i32 0, i32 0, i32 1, i32 0>, [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = sub <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 2, i32 0, i32 1, i32 7>
; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP16]], zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP16]], zeroinitializer
; CHECK-NEXT: [[TMP18:%.*]] = sub <4 x i32> [[TMP16]], zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[TMP17]], zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> <i32 0, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
; CHECK-NEXT: [[TMP20:%.*]] = add <4 x i32> [[TMP19]], zeroinitializer
; CHECK-NEXT: [[TMP21:%.*]] = sub <4 x i32> [[TMP19]], zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP2:%.*]], align 4 ; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP2:%.*]], align 4
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;

View File

@ -8,15 +8,13 @@ define void @test() {
; CHECK: [[BB1:.*]]: ; CHECK: [[BB1:.*]]:
; CHECK-NEXT: br label %[[BB2:.*]] ; CHECK-NEXT: br label %[[BB2:.*]]
; CHECK: [[BB2]]: ; CHECK: [[BB2]]:
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ poison, %[[BB1]] ], [ [[TMP5:%.*]], %[[BB6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ poison, %[[BB1]] ], [ [[TMP4:%.*]], %[[BB6]] ]
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; CHECK: [[BB6]]: ; CHECK: [[BB6]]:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ] ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 4> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 4>
; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP4]] = mul <4 x i32> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> zeroinitializer, [[TMP2]]
; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[TMP8]] = mul <2 x i32> zeroinitializer, [[TMP7]] ; CHECK-NEXT: [[TMP8]] = mul <2 x i32> zeroinitializer, [[TMP7]]
; CHECK-NEXT: br i1 false, label %[[BB2]], label %[[BB6]] ; CHECK-NEXT: br i1 false, label %[[BB2]], label %[[BB6]]

View File

@ -7,14 +7,12 @@ define i16 @test(i16 %v1, i16 %v2) {
; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[V2]], i32 3 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[V2]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[V1]], i32 3 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[V1]], i32 3
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i16> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i16> [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 poison, i32 3> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 poison, i32 3>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[V1]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[V1]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i16> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i16> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i16> [[TMP9]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i16> [[TMP9]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <4 x i16> [[TMP10]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <4 x i16> [[TMP10]], zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP11]], zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP11]], zeroinitializer

View File

@ -4,17 +4,13 @@
define i64 @foo(i32 %tmp7) { define i64 @foo(i32 %tmp7) {
; CHECK-LABEL: @foo( ; CHECK-LABEL: @foo(
; CHECK-NEXT: bb: ; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP5:%.*]], i32 2 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison>, i32 [[TMP8:%.*]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP0]], <i32 0, i32 0, i32 poison, i32 0, i32 0, i32 poison, i32 0, i32 poison>
; CHECK-NEXT: [[TMP24:%.*]] = sub i32 undef, 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 0>, <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 14, i32 poison, i32 poison, i32 7>
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 undef, i32 0>, i32 [[TMP24]], i32 4 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x i32> [[TMP2]], i32 0, i32 5
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 0, i32 5 ; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <8 x i32> [[TMP13]], [[TMP4]]
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> <i32 poison, i32 poison, i32 undef, i32 poison, i32 poison, i32 undef, i32 poison, i32 undef>, i32 [[TMP24]], i32 6 ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> [[TMP13]], [[TMP4]]
; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP3]], i64 0) ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 12, i32 13, i32 6, i32 7>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP12]], <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 poison, i32 2, i32 3, i32 poison, i32 14, i32 poison>
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[TMP1]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 4, i32 5, i32 14, i32 15>
; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i32> zeroinitializer, [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i32> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i32> [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i32> [[TMP8]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]]) ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
@ -29,7 +25,7 @@ bb:
%tmp4 = xor i32 %tmp3, 0 %tmp4 = xor i32 %tmp3, 0
%tmp6 = sub i32 0, 0 %tmp6 = sub i32 0, 0
%tmp8 = sub i32 %tmp7, 0 %tmp8 = sub i32 %tmp7, 0
%tmp9 = sub nsw i32 0, undef %tmp9 = sub nsw i32 0, poison
%tmp10 = add nsw i32 0, %tmp6 %tmp10 = add nsw i32 0, %tmp6
%tmp11 = sub nsw i32 0, %tmp8 %tmp11 = sub nsw i32 0, %tmp8
%tmp12 = add i32 0, %tmp10 %tmp12 = add i32 0, %tmp10
@ -44,10 +40,10 @@ bb:
%tmp21 = add i32 %tmp20, %tmp17 %tmp21 = add i32 %tmp20, %tmp17
%tmp22 = sub i32 0, 0 %tmp22 = sub i32 0, 0
%tmp23 = add i32 0, 0 %tmp23 = add i32 0, 0
%tmp24 = sub i32 undef, 0 %tmp24 = sub i32 poison, 0
%tmp25 = add nsw i32 %tmp23, undef %tmp25 = add nsw i32 %tmp23, poison
%tmp26 = add nsw i32 %tmp24, %tmp22 %tmp26 = add nsw i32 %tmp24, %tmp22
%tmp27 = sub nsw i32 undef, %tmp24 %tmp27 = sub nsw i32 poison, %tmp24
%tmp28 = add i32 0, %tmp25 %tmp28 = add i32 0, %tmp25
%tmp29 = xor i32 %tmp28, 0 %tmp29 = xor i32 %tmp28, 0
%tmp30 = add i32 0, %tmp26 %tmp30 = add i32 0, %tmp26
@ -58,7 +54,7 @@ bb:
%tmp35 = add i32 %tmp34, %tmp29 %tmp35 = add i32 %tmp34, %tmp29
%tmp36 = add i32 %tmp35, 0 %tmp36 = add i32 %tmp35, 0
%tmp37 = add i32 %tmp36, %tmp33 %tmp37 = add i32 %tmp36, %tmp33
%tmp38 = sub nsw i32 0, undef %tmp38 = sub nsw i32 0, poison
%tmp39 = add i32 0, %tmp38 %tmp39 = add i32 0, %tmp38
%tmp40 = xor i32 %tmp39, 0 %tmp40 = xor i32 %tmp39, 0
%tmp41 = add i32 0, %tmp37 %tmp41 = add i32 0, %tmp37

View File

@ -9,9 +9,7 @@ define i32 @foo(ptr nocapture %A, i32 %n, i32 %m) {
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10> ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9) ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9)
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0

View File

@ -6,26 +6,26 @@
define i64 @foo() { define i64 @foo() {
; CHECK-LABEL: define i64 @foo() { ; CHECK-LABEL: define i64 @foo() {
; CHECK-NEXT: bb: ; CHECK-NEXT: bb:
; CHECK-NEXT: [[ADD7:%.*]] = add i64 0, 0
; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb1: ; CHECK: bb1:
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ADD:%.*]], [[BB3]] ] ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
; CHECK-NEXT: [[PHI2:%.*]] = phi i64 [ [[TMP9:%.*]], [[BB3]] ]
; CHECK-NEXT: ret i64 0 ; CHECK-NEXT: ret i64 0
; CHECK: bb3: ; CHECK: bb3:
; CHECK-NEXT: [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ] ; CHECK-NEXT: [[PHI4:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP3:%.*]], [[BB3]] ]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI4]], i32 0
; CHECK-NEXT: [[ADD]] = add i64 [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[TMP3]] = add <2 x i64> [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 0 ; CHECK-NEXT: [[TMP5]] = add <2 x i64> [[TMP0]], [[TMP2]]
; CHECK-NEXT: [[TMP9]] = or i64 [[PHI5]], 0 ; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]]
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0 ; CHECK-NEXT: [[OR:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
; CHECK-NEXT: [[TMP7]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[ADD]], i32 0 ; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[OR]], 0
; CHECK-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]] ; CHECK-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]]
; ;
; FORCED-LABEL: define i64 @foo() { ; FORCED-LABEL: define i64 @foo() {
; FORCED-NEXT: bb: ; FORCED-NEXT: bb:
; FORCED-NEXT: [[TMP8:%.*]] = add i64 0, 0 ; FORCED-NEXT: [[ADD7:%.*]] = add i64 0, 0
; FORCED-NEXT: br label [[BB3:%.*]] ; FORCED-NEXT: br label [[BB3:%.*]]
; FORCED: bb1: ; FORCED: bb1:
; FORCED-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ] ; FORCED-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
@ -36,12 +36,10 @@ define i64 @foo() {
; FORCED-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3> ; FORCED-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
; FORCED-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI5]], i32 0 ; FORCED-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI5]], i32 0
; FORCED-NEXT: [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]] ; FORCED-NEXT: [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
; FORCED-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] ; FORCED-NEXT: [[TMP5]] = add <2 x i64> [[TMP1]], [[TMP2]]
; FORCED-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] ; FORCED-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]]
; FORCED-NEXT: [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3> ; FORCED-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
; FORCED-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[TMP8]] ; FORCED-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP8]], 0
; FORCED-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
; FORCED-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
; FORCED-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]] ; FORCED-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]]
; ;
bb: bb:

View File

@ -9,9 +9,7 @@ define i32 @test() {
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[A_PROMOTED]], i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[A_PROMOTED]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i16>
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[TMP5]], <i16 0, i16 -1, i16 0, i16 0> ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[TMP5]], <i16 0, i16 -1, i16 0, i16 0>
; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i32 ; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i32

View File

@ -8,10 +8,8 @@ define i32 @foo() {
; CHECK-NEXT: [[D:%.*]] = load i32, ptr null, align 4 ; CHECK-NEXT: [[D:%.*]] = load i32, ptr null, align 4
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 undef, i32 1, i32 0>, i32 [[D]], i32 1 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 undef, i32 1, i32 0>, i32 [[D]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 1> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> zeroinitializer, [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]] ; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 5, i32 6, i32 7>
; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4
; CHECK-NEXT: ret i32 0 ; CHECK-NEXT: ret i32 0
; ;
entry: entry:

View File

@ -4,6 +4,24 @@
; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED ; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
define void @foo() { define void @foo() {
; CHECK-LABEL: define void @foo() {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
; CHECK-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
; CHECK-NEXT: br label [[BB4]]
; CHECK: bb4:
; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]]
; CHECK: bb5:
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP2]], [[BB4]] ]
; CHECK-NEXT: ret void
;
; FORCED-LABEL: define void @foo() { ; FORCED-LABEL: define void @foo() {
; FORCED-NEXT: bb: ; FORCED-NEXT: bb:
; FORCED-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0 ; FORCED-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
@ -11,9 +29,7 @@ define void @foo() {
; FORCED: bb1: ; FORCED: bb1:
; FORCED-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ] ; FORCED-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
; FORCED-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]] ; FORCED-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]]
; FORCED-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[TMP0]] ; FORCED-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
; FORCED-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
; FORCED-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
; FORCED-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer ; FORCED-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
; FORCED-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 ; FORCED-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
; FORCED-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]]) ; FORCED-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
@ -21,29 +37,9 @@ define void @foo() {
; FORCED: bb4: ; FORCED: bb4:
; FORCED-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]] ; FORCED-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]]
; FORCED: bb5: ; FORCED: bb5:
; FORCED-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP4]], [[BB4]] ] ; FORCED-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP2]], [[BB4]] ]
; FORCED-NEXT: ret void ; FORCED-NEXT: ret void
; ;
; CHECK-LABEL: define void @foo() {
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[TMP2]], 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SHL]], i32 0
; CHECK-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
; CHECK-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
; CHECK-NEXT: br label [[BB4]]
; CHECK: bb4:
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1
; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]]
; CHECK: bb5:
; CHECK-NEXT: [[PHI6:%.*]] = phi i32 [ [[SHL]], [[BB4]] ]
; CHECK-NEXT: [[PHI7:%.*]] = phi i32 [ [[TMP8]], [[BB4]] ]
; CHECK-NEXT: ret void
;
bb: bb:
br label %bb1 br label %bb1

View File

@ -330,9 +330,7 @@ define void @only_arcp(ptr %x) {
define void @addsub_all_nsw(ptr %x) { define void @addsub_all_nsw(ptr %x) {
; CHECK-LABEL: @addsub_all_nsw( ; CHECK-LABEL: @addsub_all_nsw(
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 -1, i32 1, i32 -1>
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], splat (i32 1)
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4 ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
@ -361,9 +359,7 @@ define void @addsub_all_nsw(ptr %x) {
define void @addsub_some_nsw(ptr %x) { define void @addsub_some_nsw(ptr %x) {
; CHECK-LABEL: @addsub_some_nsw( ; CHECK-LABEL: @addsub_some_nsw(
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 -1, i32 1, i32 -1>
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1)
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4 ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
@ -392,9 +388,7 @@ define void @addsub_some_nsw(ptr %x) {
define void @addsub_no_nsw(ptr %x) { define void @addsub_no_nsw(ptr %x) {
; CHECK-LABEL: @addsub_no_nsw( ; CHECK-LABEL: @addsub_no_nsw(
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 -1, i32 1, i32 -1>
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1)
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4 ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;

View File

@ -9,16 +9,16 @@ define i32 @test(i1 %cond) {
; CHECK: [[BB]]: ; CHECK: [[BB]]:
; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OR92:%.*]], %[[BB]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OR92:%.*]], %[[BB]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], %[[BB]] ], [ zeroinitializer, %[[ENTRY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], %[[BB]] ], [ zeroinitializer, %[[ENTRY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = or i32 1, 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 0>, <4 x i32> <i32 poison, i32 1, i32 6, i32 7> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 0>, <4 x i32> <i32 poison, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[P1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[P1]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> zeroinitializer, [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> zeroinitializer, [[TMP4]]
; CHECK-NEXT: [[OR92]] = or i32 1, 0 ; CHECK-NEXT: [[OR92]] = or i32 1, 0
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[OR92]], i32 0
; CHECK-NEXT: [[TMP8]] = xor <2 x i32> [[TMP9]], [[TMP7]]
; CHECK-NEXT: [[OP_RDX:%.*]] = xor i32 [[TMP6]], [[OR92]] ; CHECK-NEXT: [[OP_RDX:%.*]] = xor i32 [[TMP6]], [[OR92]]
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[OP_RDX]], i32 0
; CHECK-NEXT: [[TMP8]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP1]], i32 1
; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[BB]] ; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[BB]]
; CHECK: [[EXIT]]: ; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 [[OP_RDX]] ; CHECK-NEXT: ret i32 [[OP_RDX]]

View File

@ -14,10 +14,8 @@ define void @test() {
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i16> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i16> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i16> zeroinitializer, [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = sub <4 x i16> zeroinitializer, [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = sub <4 x i16> zeroinitializer, [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> ; CHECK-NEXT: [[TMP15:%.*]] = sext <4 x i16> [[TMP13]] to <4 x i32>
; CHECK-NEXT: [[TMP15:%.*]] = sext <4 x i16> [[TMP14]] to <4 x i32>
; CHECK-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP2]], align 16 ; CHECK-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP2]], align 16
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;

View File

@ -6,11 +6,9 @@ define i1 @test() {
; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[H_PROMOTED118_I_FR:%.*]] = freeze i32 1 ; CHECK-NEXT: [[H_PROMOTED118_I_FR:%.*]] = freeze i32 1
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[H_PROMOTED118_I_FR]], i32 2 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[H_PROMOTED118_I_FR]], i32 2
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> zeroinitializer, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> zeroinitializer, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> zeroinitializer, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> <i32 2, i32 2, i32 7, i32 2>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP3]], <4 x i32> <i32 2, i32 2, i32 7, i32 2>
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP5]], <i32 0, i32 1, i32 1, i32 1> ; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP5]], <i32 0, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[TMP6]], <i32 1, i32 0, i32 0, i32 0> ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[TMP6]], <i32 1, i32 0, i32 0, i32 0>
; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]]) ; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])

View File

@ -242,13 +242,18 @@ exit:
} }
define void @store_try_reorder(ptr %dst) { define void @store_try_reorder(ptr %dst) {
; CHECK-LABEL: @store_try_reorder( ; NON-POW2-LABEL: @store_try_reorder(
; CHECK-NEXT: entry: ; NON-POW2-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 0, 0 ; NON-POW2-NEXT: store <3 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
; CHECK-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void
; CHECK-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1 ;
; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4 ; POW2-ONLY-LABEL: @store_try_reorder(
; CHECK-NEXT: ret void ; POW2-ONLY-NEXT: entry:
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
; POW2-ONLY-NEXT: ret void
; ;
entry: entry:
%add = add i32 0, 0 %add = add i32 0, 0

View File

@ -192,9 +192,7 @@ define void @addsub0(ptr noalias %dst, ptr noalias %src) {
; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
; CHECK-NEXT: store i32 [[TMP1]], ptr [[INCDEC_PTR1]], align 4 ; CHECK-NEXT: store i32 [[TMP1]], ptr [[INCDEC_PTR1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -2, i32 -3> ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -2, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <2 x i32> [[TMP2]], <i32 -2, i32 -3>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr [[INCDEC_PTR3]], align 4 ; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr [[INCDEC_PTR3]], align 4
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
@ -225,9 +223,7 @@ define void @addsub1(ptr noalias %dst, ptr noalias %src) {
; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2 ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2
; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], splat (i32 -1) ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP0]], <i32 -1, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> [[TMP0]], splat (i32 -1)
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[DST]], align 4 ; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[DST]], align 4
; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INCDEC_PTR2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INCDEC_PTR2]], align 4

View File

@ -1,18 +1,29 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} ; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=X86 %}
; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} ; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=AARCH64 %}
define <2 x i32> @test(i32 %arg) { define <2 x i32> @test(i32 %arg) {
; CHECK-LABEL: define <2 x i32> @test( ; X86-LABEL: define <2 x i32> @test(
; CHECK-SAME: i32 [[ARG:%.*]]) { ; X86-SAME: i32 [[ARG:%.*]]) {
; CHECK-NEXT: bb: ; X86-NEXT: bb:
; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG]], 0 ; X86-NEXT: [[OR:%.*]] = or i32 [[ARG]], 0
; CHECK-NEXT: [[MUL:%.*]] = mul i32 0, 1 ; X86-NEXT: [[MUL:%.*]] = mul i32 0, 1
; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]] ; X86-NEXT: [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]] ; X86-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0 ; X86-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1 ; X86-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1
; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; X86-NEXT: ret <2 x i32> [[TMP1]]
;
; AARCH64-LABEL: define <2 x i32> @test(
; AARCH64-SAME: i32 [[ARG:%.*]]) {
; AARCH64-NEXT: bb:
; AARCH64-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[ARG]], i32 0
; AARCH64-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[TMP0]], zeroinitializer
; AARCH64-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
; AARCH64-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
; AARCH64-NEXT: [[MUL1:%.*]] = mul i32 [[TMP2]], [[TMP3]]
; AARCH64-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]]
; AARCH64-NEXT: ret <2 x i32> [[TMP1]]
; ;
bb: bb:
%or = or i32 %arg, 0 %or = or i32 %arg, 0

View File

@ -0,0 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -S %s | FileCheck %s
define i128 @f_768_3162(i16 %0) {
; CHECK-LABEL: @f_768_3162(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SHL22:%.*]] = shl i16 0, -1
; CHECK-NEXT: [[MUL23:%.*]] = mul i16 0, [[TMP0:%.*]]
; CHECK-NEXT: [[MUL24:%.*]] = mul i16 [[SHL22]], [[MUL23]]
; CHECK-NEXT: store i16 [[MUL24]], ptr null, align 1
; CHECK-NEXT: ret i128 0
;
entry:
%shl22 = shl i16 0, -1
%mul23 = mul i16 0, %0
%mul24 = mul i16 %shl22, %mul23
store i16 %mul24, ptr null, align 1
ret i128 0
}

View File

@ -0,0 +1,61 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -S -slp-max-reg-size=1024 %s | FileCheck %s
define void @test1(ptr %a, ptr %b) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 0
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[GEP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[TMP0]], <i32 1, i32 0, i32 1, i32 0>
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[GEP4]], align 4
; CHECK-NEXT: ret void
;
entry:
%gep0 = getelementptr inbounds i32, ptr %a, i64 0
%gep1 = getelementptr inbounds i32, ptr %a, i64 1
%gep2 = getelementptr inbounds i32, ptr %a, i64 2
%gep3 = getelementptr inbounds i32, ptr %a, i64 3
%0 = load i32, ptr %gep0, align 4
%1 = load i32, ptr %gep1, align 4
%2 = load i32, ptr %gep2, align 4
%3 = load i32, ptr %gep3, align 4
%op0 = shl i32 %0, 1
%op1 = add i32 %1, zeroinitializer
%op2 = mul i32 %2, 2
%op3 = shl i32 %3, zeroinitializer
%gep4 = getelementptr inbounds i32, ptr %b, i64 0
%gep5 = getelementptr inbounds i32, ptr %b, i64 1
%gep6 = getelementptr inbounds i32, ptr %b, i64 2
%gep7 = getelementptr inbounds i32, ptr %b, i64 3
store i32 %op0, ptr %gep4, align 4
store i32 %op1, ptr %gep5, align 4
store i32 %op2, ptr %gep6, align 4
store i32 %op3, ptr %gep7, align 4
ret void
}
define void @test2(i64 %_xstride) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL3:%.*]] = mul i64 [[_XSTRIDE:%.*]], 1
; CHECK-NEXT: [[MUL5:%.*]] = mul i64 0, 0
; CHECK-NEXT: [[MUL9:%.*]] = sub i64 0, [[_XSTRIDE]]
; CHECK-NEXT: [[MUL12:%.*]] = shl i64 [[_XSTRIDE]], 1
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i8, ptr null, i64 [[MUL3]]
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr i8, ptr null, i64 [[MUL5]]
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr i8, ptr null, i64 [[MUL9]]
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr i8, ptr null, i64 [[MUL12]]
; CHECK-NEXT: ret void
;
entry:
%mul3 = mul i64 %_xstride, 1
%mul5 = mul i64 0, 0
%mul9 = sub i64 0, %_xstride
%mul12 = shl i64 %_xstride, 1
%arrayidx = getelementptr i8, ptr null, i64 %mul3
%arrayidx6 = getelementptr i8, ptr null, i64 %mul5
%arrayidx10 = getelementptr i8, ptr null, i64 %mul9
%arrayidx13 = getelementptr i8, ptr null, i64 %mul12
ret void
}

View File

@ -6,11 +6,9 @@ define void @func(i32 %0) {
; CHECK-SAME: i32 [[TMP0:%.*]]) { ; CHECK-SAME: i32 [[TMP0:%.*]]) {
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 0, i32 0>, i32 [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 0, i32 0>, i32 [[TMP0]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP0]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP0]], 0
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <32 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <32 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64 ; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP9]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP9]], 0
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP9]] to i32

View File

@ -12,9 +12,7 @@ define i32 @test() {
; CHECK-NEXT: br i1 false, label [[BB4:%.*]], label [[BB3]] ; CHECK-NEXT: br i1 false, label [[BB4:%.*]], label [[BB3]]
; CHECK: bb3: ; CHECK: bb3:
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP5]] = add <2 x i32> zeroinitializer, [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> zeroinitializer, [[TMP2]]
; CHECK-NEXT: [[TMP5]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: br label [[BB1]] ; CHECK-NEXT: br label [[BB1]]
; CHECK: bb4: ; CHECK: bb4:
; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x i32> [ [[TMP1]], [[BB1]] ] ; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x i32> [ [[TMP1]], [[BB1]] ]