[VectorCombine] Remove dead node immediately in VectorCombine (#149047)

The vector combiner will process all instructions as it first loops
through the function, adding any newly added and deleted instructions to
a worklist which is then processed when all nodes are done. These leaves
extra uses in the graph as the initial processing is performed, leading
to sub-optimal decisions being made for other combines. This changes it
so that trivially dead instructions are removed immediately. The main
changes that this requires is to make sure iterator invalidation does not
occur.
This commit is contained in:
David Green 2025-08-18 07:55:21 +01:00 committed by GitHub
parent 6957e44d8e
commit 790bee99de
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 160 additions and 154 deletions

View File

@ -111,10 +111,8 @@ private:
const Instruction &I,
ExtractElementInst *&ConvertToShuffle,
unsigned PreferredExtractIndex);
void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
Instruction &I);
void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
Instruction &I);
Value *foldExtExtCmp(Value *V0, Value *V1, Value *ExtIndex, Instruction &I);
Value *foldExtExtBinop(Value *V0, Value *V1, Value *ExtIndex, Instruction &I);
bool foldExtractExtract(Instruction &I);
bool foldInsExtFNeg(Instruction &I);
bool foldInsExtBinop(Instruction &I);
@ -144,7 +142,7 @@ private:
bool shrinkLoadForShuffles(Instruction &I);
bool shrinkPhiOfShuffles(Instruction &I);
void replaceValue(Value &Old, Value &New) {
void replaceValue(Instruction &Old, Value &New, bool Erase = true) {
LLVM_DEBUG(dbgs() << "VC: Replacing: " << Old << '\n');
LLVM_DEBUG(dbgs() << " With: " << New << '\n');
Old.replaceAllUsesWith(&New);
@ -153,7 +151,11 @@ private:
Worklist.pushUsersToWorkList(*NewI);
Worklist.pushValue(NewI);
}
Worklist.pushValue(&Old);
if (Erase && isInstructionTriviallyDead(&Old)) {
eraseInstruction(Old);
} else {
Worklist.push(&Old);
}
}
void eraseInstruction(Instruction &I) {
@ -164,11 +166,23 @@ private:
// Push remaining users of the operands and then the operand itself - allows
// further folds that were hindered by OneUse limits.
for (Value *Op : Ops)
if (auto *OpI = dyn_cast<Instruction>(Op)) {
Worklist.pushUsersToWorkList(*OpI);
Worklist.pushValue(OpI);
SmallPtrSet<Value *, 4> Visited;
for (Value *Op : Ops) {
if (Visited.insert(Op).second) {
if (auto *OpI = dyn_cast<Instruction>(Op)) {
if (RecursivelyDeleteTriviallyDeadInstructions(
OpI, nullptr, nullptr, [this](Value *V) {
if (auto I = dyn_cast<Instruction>(V)) {
LLVM_DEBUG(dbgs() << "VC: Erased: " << *I << '\n');
Worklist.remove(I);
}
}))
continue;
Worklist.pushUsersToWorkList(*OpI);
Worklist.pushValue(OpI);
}
}
}
}
};
} // namespace
@ -552,9 +566,8 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
/// the source vector (shift the scalar element) to a NewIndex for extraction.
/// Return null if the input can be constant folded, so that we are not creating
/// unnecessary instructions.
static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
unsigned NewIndex,
IRBuilderBase &Builder) {
static Value *translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex,
IRBuilderBase &Builder) {
// Shufflevectors can only be created for fixed-width vectors.
Value *X = ExtElt->getVectorOperand();
if (!isa<FixedVectorType>(X->getType()))
@ -569,52 +582,43 @@ static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
Value *Shuf = createShiftShuffle(X, cast<ConstantInt>(C)->getZExtValue(),
NewIndex, Builder);
return dyn_cast<ExtractElementInst>(
Builder.CreateExtractElement(Shuf, NewIndex));
return Shuf;
}
/// Try to reduce extract element costs by converting scalar compares to vector
/// compares followed by extract.
/// cmp (ext0 V0, C), (ext1 V1, C)
void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
ExtractElementInst *Ext1, Instruction &I) {
/// cmp (ext0 V0, ExtIndex), (ext1 V1, ExtIndex)
Value *VectorCombine::foldExtExtCmp(Value *V0, Value *V1, Value *ExtIndex,
Instruction &I) {
assert(isa<CmpInst>(&I) && "Expected a compare");
assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
"Expected matching constant extract indexes");
// cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
// cmp Pred (extelt V0, ExtIndex), (extelt V1, ExtIndex)
// --> extelt (cmp Pred V0, V1), ExtIndex
++NumVecCmp;
CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand());
replaceValue(I, *NewExt);
return Builder.CreateExtractElement(VecCmp, ExtIndex, "foldExtExtCmp");
}
/// Try to reduce extract element costs by converting scalar binops to vector
/// binops followed by extract.
/// bo (ext0 V0, C), (ext1 V1, C)
void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
ExtractElementInst *Ext1, Instruction &I) {
/// bo (ext0 V0, ExtIndex), (ext1 V1, ExtIndex)
Value *VectorCombine::foldExtExtBinop(Value *V0, Value *V1, Value *ExtIndex,
Instruction &I) {
assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
"Expected matching constant extract indexes");
// bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
// bo (extelt V0, ExtIndex), (extelt V1, ExtIndex)
// --> extelt (bo V0, V1), ExtIndex
++NumVecBO;
Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
Value *VecBO =
Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
Value *VecBO = Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0,
V1, "foldExtExtBinop");
// All IR flags are safe to back-propagate because any potential poison
// created in unused vector elements is discarded by the extract.
if (auto *VecBOInst = dyn_cast<Instruction>(VecBO))
VecBOInst->copyIRFlags(&I);
Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand());
replaceValue(I, *NewExt);
return Builder.CreateExtractElement(VecBO, ExtIndex, "foldExtExtBinop");
}
/// Match an instruction with extracted vector operands.
@ -653,25 +657,29 @@ bool VectorCombine::foldExtractExtract(Instruction &I) {
if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, InsertIndex))
return false;
Value *ExtOp0 = Ext0->getVectorOperand();
Value *ExtOp1 = Ext1->getVectorOperand();
if (ExtractToChange) {
unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
ExtractElementInst *NewExtract =
Value *NewExtOp =
translateExtract(ExtractToChange, CheapExtractIdx, Builder);
if (!NewExtract)
if (!NewExtOp)
return false;
if (ExtractToChange == Ext0)
Ext0 = NewExtract;
ExtOp0 = NewExtOp;
else
Ext1 = NewExtract;
ExtOp1 = NewExtOp;
}
if (Pred != CmpInst::BAD_ICMP_PREDICATE)
foldExtExtCmp(Ext0, Ext1, I);
else
foldExtExtBinop(Ext0, Ext1, I);
Value *ExtIndex = ExtractToChange == Ext0 ? Ext1->getIndexOperand()
: Ext0->getIndexOperand();
Value *NewExt = Pred != CmpInst::BAD_ICMP_PREDICATE
? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex, I)
: foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex, I);
Worklist.push(Ext0);
Worklist.push(Ext1);
replaceValue(I, *NewExt);
return true;
}
@ -1824,7 +1832,7 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
LI->getAlign(), VecTy->getElementType(), Idx, *DL);
NewLoad->setAlignment(ScalarOpAlignment);
replaceValue(*EI, *NewLoad);
replaceValue(*EI, *NewLoad, false);
}
FailureGuard.release();
@ -3112,7 +3120,7 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
Shuffle->getOperand(0), Shuffle->getOperand(1), ConcatMask);
LLVM_DEBUG(dbgs() << "Created new shuffle: " << *NewShuffle << "\n");
replaceValue(*Shuffle, *NewShuffle);
MadeChanges = true;
return true;
}
// See if we can re-use foldSelectShuffle, getting it to reduce the size of
@ -3608,7 +3616,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
for (int S = 0, E = ReconstructMasks.size(); S != E; S++) {
Builder.SetInsertPoint(Shuffles[S]);
Value *NSV = Builder.CreateShuffleVector(NOp0, NOp1, ReconstructMasks[S]);
replaceValue(*Shuffles[S], *NSV);
replaceValue(*Shuffles[S], *NSV, false);
}
Worklist.pushValue(NSV0A);
@ -3979,7 +3987,7 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
Value *NewShuffle = Builder.CreateShuffleVector(
NewLoad, PoisonValue::get(NewLoadTy), NewMask);
replaceValue(*Shuffle, *NewShuffle);
replaceValue(*Shuffle, *NewShuffle, false);
}
return true;
@ -4095,8 +4103,7 @@ bool VectorCombine::run() {
LLVM_DEBUG(dbgs() << "\n\nVECTORCOMBINE on " << F.getName() << "\n");
bool MadeChange = false;
auto FoldInst = [this, &MadeChange](Instruction &I) {
auto FoldInst = [this](Instruction &I) {
Builder.SetInsertPoint(&I);
bool IsVectorType = isa<VectorType>(I.getType());
bool IsFixedVectorType = isa<FixedVectorType>(I.getType());
@ -4111,10 +4118,12 @@ bool VectorCombine::run() {
if (IsFixedVectorType) {
switch (Opcode) {
case Instruction::InsertElement:
MadeChange |= vectorizeLoadInsert(I);
if (vectorizeLoadInsert(I))
return true;
break;
case Instruction::ShuffleVector:
MadeChange |= widenSubvectorLoad(I);
if (widenSubvectorLoad(I))
return true;
break;
default:
break;
@ -4124,19 +4133,25 @@ bool VectorCombine::run() {
// This transform works with scalable and fixed vectors
// TODO: Identify and allow other scalable transforms
if (IsVectorType) {
MadeChange |= scalarizeOpOrCmp(I);
MadeChange |= scalarizeLoadExtract(I);
MadeChange |= scalarizeExtExtract(I);
MadeChange |= scalarizeVPIntrinsic(I);
MadeChange |= foldInterleaveIntrinsics(I);
if (scalarizeOpOrCmp(I))
return true;
if (scalarizeLoadExtract(I))
return true;
if (scalarizeExtExtract(I))
return true;
if (scalarizeVPIntrinsic(I))
return true;
if (foldInterleaveIntrinsics(I))
return true;
}
if (Opcode == Instruction::Store)
MadeChange |= foldSingleElementStore(I);
if (foldSingleElementStore(I))
return true;
// If this is an early pipeline invocation of this pass, we are done.
if (TryEarlyFoldsOnly)
return;
return false;
// Otherwise, try folds that improve codegen but may interfere with
// early IR canonicalizations.
@ -4145,62 +4160,87 @@ bool VectorCombine::run() {
if (IsFixedVectorType) {
switch (Opcode) {
case Instruction::InsertElement:
MadeChange |= foldInsExtFNeg(I);
MadeChange |= foldInsExtBinop(I);
MadeChange |= foldInsExtVectorToShuffle(I);
if (foldInsExtFNeg(I))
return true;
if (foldInsExtBinop(I))
return true;
if (foldInsExtVectorToShuffle(I))
return true;
break;
case Instruction::ShuffleVector:
MadeChange |= foldPermuteOfBinops(I);
MadeChange |= foldShuffleOfBinops(I);
MadeChange |= foldShuffleOfSelects(I);
MadeChange |= foldShuffleOfCastops(I);
MadeChange |= foldShuffleOfShuffles(I);
MadeChange |= foldShuffleOfIntrinsics(I);
MadeChange |= foldSelectShuffle(I);
MadeChange |= foldShuffleToIdentity(I);
if (foldPermuteOfBinops(I))
return true;
if (foldShuffleOfBinops(I))
return true;
if (foldShuffleOfSelects(I))
return true;
if (foldShuffleOfCastops(I))
return true;
if (foldShuffleOfShuffles(I))
return true;
if (foldShuffleOfIntrinsics(I))
return true;
if (foldSelectShuffle(I))
return true;
if (foldShuffleToIdentity(I))
return true;
break;
case Instruction::Load:
MadeChange |= shrinkLoadForShuffles(I);
if (shrinkLoadForShuffles(I))
return true;
break;
case Instruction::BitCast:
MadeChange |= foldBitcastShuffle(I);
if (foldBitcastShuffle(I))
return true;
break;
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
MadeChange |= foldBitOpOfCastops(I);
if (foldBitOpOfCastops(I))
return true;
break;
case Instruction::PHI:
MadeChange |= shrinkPhiOfShuffles(I);
if (shrinkPhiOfShuffles(I))
return true;
break;
default:
MadeChange |= shrinkType(I);
if (shrinkType(I))
return true;
break;
}
} else {
switch (Opcode) {
case Instruction::Call:
MadeChange |= foldShuffleFromReductions(I);
MadeChange |= foldCastFromReductions(I);
if (foldShuffleFromReductions(I))
return true;
if (foldCastFromReductions(I))
return true;
break;
case Instruction::ICmp:
case Instruction::FCmp:
MadeChange |= foldExtractExtract(I);
if (foldExtractExtract(I))
return true;
break;
case Instruction::Or:
MadeChange |= foldConcatOfBoolMasks(I);
if (foldConcatOfBoolMasks(I))
return true;
[[fallthrough]];
default:
if (Instruction::isBinaryOp(Opcode)) {
MadeChange |= foldExtractExtract(I);
MadeChange |= foldExtractedCmps(I);
MadeChange |= foldBinopOfReductions(I);
if (foldExtractExtract(I))
return true;
if (foldExtractedCmps(I))
return true;
if (foldBinopOfReductions(I))
return true;
}
break;
}
}
return false;
};
bool MadeChange = false;
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
if (!DT.isReachableFromEntry(&BB))
@ -4209,7 +4249,7 @@ bool VectorCombine::run() {
for (Instruction &I : make_early_inc_range(BB)) {
if (I.isDebugOrPseudoInst())
continue;
FoldInst(I);
MadeChange |= FoldInst(I);
}
}
@ -4223,7 +4263,7 @@ bool VectorCombine::run() {
continue;
}
FoldInst(*I);
MadeChange |= FoldInst(*I);
}
return MadeChange;

View File

@ -926,53 +926,17 @@ define void @same_op8_splat(ptr noalias noundef %a, ptr noundef %b, ptr noundef
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[C]], align 4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 0, i32 8>
; CHECK-NEXT: [[STRIDED_VEC12:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 1, i32 9>
; CHECK-NEXT: [[STRIDED_VEC13:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 2, i32 10>
; CHECK-NEXT: [[STRIDED_VEC14:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 3, i32 11>
; CHECK-NEXT: [[STRIDED_VEC15:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 4, i32 12>
; CHECK-NEXT: [[STRIDED_VEC16:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 5, i32 13>
; CHECK-NEXT: [[STRIDED_VEC17:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 6, i32 14>
; CHECK-NEXT: [[STRIDED_VEC18:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 7, i32 15>
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_VEC19:%.*]] = load <16 x float>, ptr [[TMP6]], align 4
; CHECK-NEXT: [[STRIDED_VEC20:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 0, i32 8>
; CHECK-NEXT: [[STRIDED_VEC21:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 1, i32 9>
; CHECK-NEXT: [[STRIDED_VEC22:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 2, i32 10>
; CHECK-NEXT: [[STRIDED_VEC23:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 3, i32 11>
; CHECK-NEXT: [[STRIDED_VEC24:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 4, i32 12>
; CHECK-NEXT: [[STRIDED_VEC25:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 5, i32 13>
; CHECK-NEXT: [[STRIDED_VEC26:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 6, i32 14>
; CHECK-NEXT: [[STRIDED_VEC27:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 7, i32 15>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[STRIDED_VEC20]], <2 x float> [[STRIDED_VEC21]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[STRIDED_VEC]], <2 x float> [[STRIDED_VEC12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x float> [[TMP8]], [[TMP1]]
; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[TMP7]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[STRIDED_VEC22]], <2 x float> [[STRIDED_VEC23]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[STRIDED_VEC13]], <2 x float> [[STRIDED_VEC14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <4 x float> [[TMP12]], [[TMP2]]
; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x float> [[STRIDED_VEC24]], <2 x float> [[STRIDED_VEC25]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[STRIDED_VEC15]], <2 x float> [[STRIDED_VEC16]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP17:%.*]] = fmul fast <4 x float> [[TMP16]], [[TMP3]]
; CHECK-NEXT: [[TMP18:%.*]] = fadd fast <4 x float> [[TMP15]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x float> [[STRIDED_VEC26]], <2 x float> [[STRIDED_VEC27]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[STRIDED_VEC17]], <2 x float> [[STRIDED_VEC18]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP21:%.*]] = fmul fast <4 x float> [[TMP20]], [[TMP4]]
; CHECK-NEXT: [[TMP22:%.*]] = fadd fast <4 x float> [[TMP19]], [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> [[TMP22]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP23]], <8 x float> [[TMP24]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[TMP1]]
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x float> [[WIDE_VEC19]], [[TMP4]]
; CHECK-NEXT: store <16 x float> [[INTERLEAVED_VEC]], ptr [[TMP6]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 144

View File

@ -404,13 +404,13 @@ define <16 x i16> @add_v16i16_FEuCBA98765432u0(<16 x i16> %a, <16 x i16> %b) {
; SSE4-LABEL: @add_v16i16_FEuCBA98765432u0(
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 poison, i32 11, i32 12, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 poison, i32 26, i32 29, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 11, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 26, i32 29, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP6:%.*]] = add <16 x i16> [[TMP4]], [[TMP5]]
; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 poison, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 poison, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP9:%.*]] = add <16 x i16> [[TMP7]], [[TMP8]]
; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[TMP6]], <16 x i32> <i32 3, i32 2, i32 poison, i32 1, i32 0, i32 27, i32 26, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16>
; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[TMP6]], <16 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16>
; SSE4-NEXT: ret <16 x i16> [[RESULT]]
;
; AVX2-LABEL: @add_v16i16_FEuCBA98765432u0(

View File

@ -398,13 +398,13 @@ define <16 x i16> @sub_v16i16_FEuCBA98765432u0(<16 x i16> %a, <16 x i16> %b) {
; SSE4-LABEL: @sub_v16i16_FEuCBA98765432u0(
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 poison, i32 10, i32 12, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 poison, i32 27, i32 29, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 27, i32 29, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[TMP4]], [[TMP5]]
; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 poison, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 poison, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP9:%.*]] = sub <16 x i16> [[TMP7]], [[TMP8]]
; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[TMP6]], <16 x i32> <i32 3, i32 2, i32 poison, i32 1, i32 0, i32 27, i32 26, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16>
; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[TMP6]], <16 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16>
; SSE4-NEXT: ret <16 x i16> [[RESULT]]
;
; AVX2-LABEL: @sub_v16i16_FEuCBA98765432u0(

View File

@ -268,7 +268,7 @@ define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
; CHECK-LABEL: @ext5_ext0_add(
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0
; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
; CHECK-NEXT: ret i8 [[R]]
;
%e0 = extractelement <16 x i8> %x, i32 5
@ -294,7 +294,7 @@ define float @ext1_ext0_fmul(<4 x float> %x) {
; CHECK-LABEL: @ext1_ext0_fmul(
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[SHIFT]], [[X]]
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: ret float [[R]]
;
%e0 = extractelement <4 x float> %x, i32 1
@ -363,7 +363,7 @@ define float @ext7_ext4_fmul_v8f32(<8 x float> %x) {
; AVX-LABEL: @ext7_ext4_fmul_v8f32(
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison, i32 poison, i32 poison>
; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]]
; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i64 4
; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i32 4
; AVX-NEXT: ret float [[R]]
;
%e0 = extractelement <8 x float> %x, i32 7
@ -484,7 +484,7 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], [[SHIFT1]]
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[SHIFT2]], [[TMP2]]
; CHECK-NEXT: [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
; CHECK-NEXT: [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
; CHECK-NEXT: ret i32 [[Z0123]]
;
%z = and <4 x i32> %x, %y
@ -504,7 +504,7 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]]
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]]
; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
; CHECK-NEXT: ret i32 [[X210]]
;
%x0 = extractelement <4 x i32> %x, i32 0
@ -523,7 +523,7 @@ define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]]
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHIFT2]], [[TMP2]]
; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
; CHECK-NEXT: ret i32 [[X2Y210]]
;
%y0 = extractelement <4 x i32> %y, i32 0

View File

@ -268,7 +268,7 @@ define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
; CHECK-LABEL: @ext5_ext0_add(
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0
; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
; CHECK-NEXT: ret i8 [[R]]
;
%e0 = extractelement <16 x i8> %x, i32 5
@ -294,7 +294,7 @@ define float @ext1_ext0_fmul(<4 x float> %x) {
; CHECK-LABEL: @ext1_ext0_fmul(
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[SHIFT]], [[X]]
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: ret float [[R]]
;
%e0 = extractelement <4 x float> %x, i32 1
@ -363,7 +363,7 @@ define float @ext7_ext4_fmul_v8f32(<8 x float> %x) {
; AVX-LABEL: @ext7_ext4_fmul_v8f32(
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison, i32 poison, i32 poison>
; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]]
; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i64 4
; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i32 4
; AVX-NEXT: ret float [[R]]
;
%e0 = extractelement <8 x float> %x, i32 7
@ -490,7 +490,7 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], [[SHIFT1]]
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[SHIFT2]], [[TMP2]]
; CHECK-NEXT: [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
; CHECK-NEXT: [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
; CHECK-NEXT: ret i32 [[Z0123]]
;
%z = and <4 x i32> %x, %y
@ -510,7 +510,7 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]]
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]]
; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
; CHECK-NEXT: ret i32 [[X210]]
;
%x0 = extractelement <4 x i32> %x, i32 0
@ -529,7 +529,7 @@ define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]]
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHIFT2]], [[TMP2]]
; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
; CHECK-NEXT: ret i32 [[X2Y210]]
;
%y0 = extractelement <4 x i32> %y, i32 0
@ -573,10 +573,8 @@ define i64 @instsimplify_folder_crash(<4 x i64> %in) {
; CHECK-LABEL: @instsimplify_folder_crash(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SHUFFLE_1:%.*]] = shufflevector <4 x i64> [[IN:%.*]], <4 x i64> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[E_0:%.*]] = extractelement <4 x i64> zeroinitializer, i64 0
; CHECK-NEXT: [[E_1:%.*]] = extractelement <4 x i64> [[SHUFFLE_1]], i64 1
; CHECK-NEXT: [[OR:%.*]] = or i64 [[E_1]], [[E_0]]
; CHECK-NEXT: ret i64 [[OR]]
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i64> [[SHUFFLE_1]], <4 x i64> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: ret i64 0
;
entry:
%shuffle.1 = shufflevector <4 x i64> %in, <4 x i64> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 2, i32 3>

View File

@ -130,7 +130,7 @@ define i1 @cmp10_v2f64(<2 x double> %x, <2 x double> %y) {
; AVX-LABEL: @cmp10_v2f64(
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
; AVX-NEXT: [[TMP1:%.*]] = fcmp ule <2 x double> [[SHIFT]], [[Y:%.*]]
; AVX-NEXT: [[CMP:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0
; AVX-NEXT: [[CMP:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; AVX-NEXT: ret i1 [[CMP]]
;
%x1 = extractelement <2 x double> %x, i32 1

View File

@ -27,6 +27,8 @@ define void @multiple_extract(ptr %p) {
; infinite loop if we fold an extract that is waiting to be erased
define void @unused_extract(ptr %p) {
; CHECK-LABEL: @unused_extract(
; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, ptr [[P:%.*]], align 8
; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[LOAD]], i64 1
; CHECK-NEXT: ret void
;
%load = load <4 x float>, ptr %p, align 8

View File

@ -253,7 +253,8 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %
; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
; CHECK-NEXT: [[R:%.*]] = load <8 x i16>, ptr [[GEP]], align 2
; CHECK-NEXT: ret <8 x i16> [[R]]
; CHECK-NEXT: [[R1:%.*]] = shufflevector <8 x i16> [[R]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: ret <8 x i16> [[R1]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
%s = load i16, ptr %gep, align 2
@ -341,7 +342,8 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %
; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0
; CHECK-NEXT: [[R:%.*]] = load <8 x i16>, ptr [[GEP]], align 16
; CHECK-NEXT: ret <8 x i16> [[R]]
; CHECK-NEXT: [[R1:%.*]] = shufflevector <8 x i16> [[R]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: ret <8 x i16> [[R1]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0
%s = load i16, ptr %gep, align 16