[VectorCombine] Remove dead node immediately in VectorCombine (#149047)
The vector combiner will process all instructions as it first loops through the function, adding any newly added and deleted instructions to a worklist which is then processed when all nodes are done. These leaves extra uses in the graph as the initial processing is performed, leading to sub-optimal decisions being made for other combines. This changes it so that trivially dead instructions are removed immediately. The main changes that this requires is to make sure iterator invalidation does not occur.
This commit is contained in:
parent
6957e44d8e
commit
790bee99de
@ -111,10 +111,8 @@ private:
|
||||
const Instruction &I,
|
||||
ExtractElementInst *&ConvertToShuffle,
|
||||
unsigned PreferredExtractIndex);
|
||||
void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
|
||||
Instruction &I);
|
||||
void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
|
||||
Instruction &I);
|
||||
Value *foldExtExtCmp(Value *V0, Value *V1, Value *ExtIndex, Instruction &I);
|
||||
Value *foldExtExtBinop(Value *V0, Value *V1, Value *ExtIndex, Instruction &I);
|
||||
bool foldExtractExtract(Instruction &I);
|
||||
bool foldInsExtFNeg(Instruction &I);
|
||||
bool foldInsExtBinop(Instruction &I);
|
||||
@ -144,7 +142,7 @@ private:
|
||||
bool shrinkLoadForShuffles(Instruction &I);
|
||||
bool shrinkPhiOfShuffles(Instruction &I);
|
||||
|
||||
void replaceValue(Value &Old, Value &New) {
|
||||
void replaceValue(Instruction &Old, Value &New, bool Erase = true) {
|
||||
LLVM_DEBUG(dbgs() << "VC: Replacing: " << Old << '\n');
|
||||
LLVM_DEBUG(dbgs() << " With: " << New << '\n');
|
||||
Old.replaceAllUsesWith(&New);
|
||||
@ -153,7 +151,11 @@ private:
|
||||
Worklist.pushUsersToWorkList(*NewI);
|
||||
Worklist.pushValue(NewI);
|
||||
}
|
||||
Worklist.pushValue(&Old);
|
||||
if (Erase && isInstructionTriviallyDead(&Old)) {
|
||||
eraseInstruction(Old);
|
||||
} else {
|
||||
Worklist.push(&Old);
|
||||
}
|
||||
}
|
||||
|
||||
void eraseInstruction(Instruction &I) {
|
||||
@ -164,11 +166,23 @@ private:
|
||||
|
||||
// Push remaining users of the operands and then the operand itself - allows
|
||||
// further folds that were hindered by OneUse limits.
|
||||
for (Value *Op : Ops)
|
||||
if (auto *OpI = dyn_cast<Instruction>(Op)) {
|
||||
Worklist.pushUsersToWorkList(*OpI);
|
||||
Worklist.pushValue(OpI);
|
||||
SmallPtrSet<Value *, 4> Visited;
|
||||
for (Value *Op : Ops) {
|
||||
if (Visited.insert(Op).second) {
|
||||
if (auto *OpI = dyn_cast<Instruction>(Op)) {
|
||||
if (RecursivelyDeleteTriviallyDeadInstructions(
|
||||
OpI, nullptr, nullptr, [this](Value *V) {
|
||||
if (auto I = dyn_cast<Instruction>(V)) {
|
||||
LLVM_DEBUG(dbgs() << "VC: Erased: " << *I << '\n');
|
||||
Worklist.remove(I);
|
||||
}
|
||||
}))
|
||||
continue;
|
||||
Worklist.pushUsersToWorkList(*OpI);
|
||||
Worklist.pushValue(OpI);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
@ -552,9 +566,8 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
|
||||
/// the source vector (shift the scalar element) to a NewIndex for extraction.
|
||||
/// Return null if the input can be constant folded, so that we are not creating
|
||||
/// unnecessary instructions.
|
||||
static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
|
||||
unsigned NewIndex,
|
||||
IRBuilderBase &Builder) {
|
||||
static Value *translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex,
|
||||
IRBuilderBase &Builder) {
|
||||
// Shufflevectors can only be created for fixed-width vectors.
|
||||
Value *X = ExtElt->getVectorOperand();
|
||||
if (!isa<FixedVectorType>(X->getType()))
|
||||
@ -569,52 +582,43 @@ static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
|
||||
|
||||
Value *Shuf = createShiftShuffle(X, cast<ConstantInt>(C)->getZExtValue(),
|
||||
NewIndex, Builder);
|
||||
return dyn_cast<ExtractElementInst>(
|
||||
Builder.CreateExtractElement(Shuf, NewIndex));
|
||||
return Shuf;
|
||||
}
|
||||
|
||||
/// Try to reduce extract element costs by converting scalar compares to vector
|
||||
/// compares followed by extract.
|
||||
/// cmp (ext0 V0, C), (ext1 V1, C)
|
||||
void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
|
||||
ExtractElementInst *Ext1, Instruction &I) {
|
||||
/// cmp (ext0 V0, ExtIndex), (ext1 V1, ExtIndex)
|
||||
Value *VectorCombine::foldExtExtCmp(Value *V0, Value *V1, Value *ExtIndex,
|
||||
Instruction &I) {
|
||||
assert(isa<CmpInst>(&I) && "Expected a compare");
|
||||
assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
|
||||
cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
|
||||
"Expected matching constant extract indexes");
|
||||
|
||||
// cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
|
||||
// cmp Pred (extelt V0, ExtIndex), (extelt V1, ExtIndex)
|
||||
// --> extelt (cmp Pred V0, V1), ExtIndex
|
||||
++NumVecCmp;
|
||||
CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
|
||||
Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
|
||||
Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
|
||||
Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand());
|
||||
replaceValue(I, *NewExt);
|
||||
return Builder.CreateExtractElement(VecCmp, ExtIndex, "foldExtExtCmp");
|
||||
}
|
||||
|
||||
/// Try to reduce extract element costs by converting scalar binops to vector
|
||||
/// binops followed by extract.
|
||||
/// bo (ext0 V0, C), (ext1 V1, C)
|
||||
void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
|
||||
ExtractElementInst *Ext1, Instruction &I) {
|
||||
/// bo (ext0 V0, ExtIndex), (ext1 V1, ExtIndex)
|
||||
Value *VectorCombine::foldExtExtBinop(Value *V0, Value *V1, Value *ExtIndex,
|
||||
Instruction &I) {
|
||||
assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
|
||||
assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
|
||||
cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
|
||||
"Expected matching constant extract indexes");
|
||||
|
||||
// bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
|
||||
// bo (extelt V0, ExtIndex), (extelt V1, ExtIndex)
|
||||
// --> extelt (bo V0, V1), ExtIndex
|
||||
++NumVecBO;
|
||||
Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
|
||||
Value *VecBO =
|
||||
Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
|
||||
Value *VecBO = Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0,
|
||||
V1, "foldExtExtBinop");
|
||||
|
||||
// All IR flags are safe to back-propagate because any potential poison
|
||||
// created in unused vector elements is discarded by the extract.
|
||||
if (auto *VecBOInst = dyn_cast<Instruction>(VecBO))
|
||||
VecBOInst->copyIRFlags(&I);
|
||||
|
||||
Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand());
|
||||
replaceValue(I, *NewExt);
|
||||
return Builder.CreateExtractElement(VecBO, ExtIndex, "foldExtExtBinop");
|
||||
}
|
||||
|
||||
/// Match an instruction with extracted vector operands.
|
||||
@ -653,25 +657,29 @@ bool VectorCombine::foldExtractExtract(Instruction &I) {
|
||||
if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, InsertIndex))
|
||||
return false;
|
||||
|
||||
Value *ExtOp0 = Ext0->getVectorOperand();
|
||||
Value *ExtOp1 = Ext1->getVectorOperand();
|
||||
|
||||
if (ExtractToChange) {
|
||||
unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
|
||||
ExtractElementInst *NewExtract =
|
||||
Value *NewExtOp =
|
||||
translateExtract(ExtractToChange, CheapExtractIdx, Builder);
|
||||
if (!NewExtract)
|
||||
if (!NewExtOp)
|
||||
return false;
|
||||
if (ExtractToChange == Ext0)
|
||||
Ext0 = NewExtract;
|
||||
ExtOp0 = NewExtOp;
|
||||
else
|
||||
Ext1 = NewExtract;
|
||||
ExtOp1 = NewExtOp;
|
||||
}
|
||||
|
||||
if (Pred != CmpInst::BAD_ICMP_PREDICATE)
|
||||
foldExtExtCmp(Ext0, Ext1, I);
|
||||
else
|
||||
foldExtExtBinop(Ext0, Ext1, I);
|
||||
|
||||
Value *ExtIndex = ExtractToChange == Ext0 ? Ext1->getIndexOperand()
|
||||
: Ext0->getIndexOperand();
|
||||
Value *NewExt = Pred != CmpInst::BAD_ICMP_PREDICATE
|
||||
? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex, I)
|
||||
: foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex, I);
|
||||
Worklist.push(Ext0);
|
||||
Worklist.push(Ext1);
|
||||
replaceValue(I, *NewExt);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1824,7 +1832,7 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
|
||||
LI->getAlign(), VecTy->getElementType(), Idx, *DL);
|
||||
NewLoad->setAlignment(ScalarOpAlignment);
|
||||
|
||||
replaceValue(*EI, *NewLoad);
|
||||
replaceValue(*EI, *NewLoad, false);
|
||||
}
|
||||
|
||||
FailureGuard.release();
|
||||
@ -3112,7 +3120,7 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
|
||||
Shuffle->getOperand(0), Shuffle->getOperand(1), ConcatMask);
|
||||
LLVM_DEBUG(dbgs() << "Created new shuffle: " << *NewShuffle << "\n");
|
||||
replaceValue(*Shuffle, *NewShuffle);
|
||||
MadeChanges = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
// See if we can re-use foldSelectShuffle, getting it to reduce the size of
|
||||
@ -3608,7 +3616,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
|
||||
for (int S = 0, E = ReconstructMasks.size(); S != E; S++) {
|
||||
Builder.SetInsertPoint(Shuffles[S]);
|
||||
Value *NSV = Builder.CreateShuffleVector(NOp0, NOp1, ReconstructMasks[S]);
|
||||
replaceValue(*Shuffles[S], *NSV);
|
||||
replaceValue(*Shuffles[S], *NSV, false);
|
||||
}
|
||||
|
||||
Worklist.pushValue(NSV0A);
|
||||
@ -3979,7 +3987,7 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
|
||||
Value *NewShuffle = Builder.CreateShuffleVector(
|
||||
NewLoad, PoisonValue::get(NewLoadTy), NewMask);
|
||||
|
||||
replaceValue(*Shuffle, *NewShuffle);
|
||||
replaceValue(*Shuffle, *NewShuffle, false);
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -4095,8 +4103,7 @@ bool VectorCombine::run() {
|
||||
|
||||
LLVM_DEBUG(dbgs() << "\n\nVECTORCOMBINE on " << F.getName() << "\n");
|
||||
|
||||
bool MadeChange = false;
|
||||
auto FoldInst = [this, &MadeChange](Instruction &I) {
|
||||
auto FoldInst = [this](Instruction &I) {
|
||||
Builder.SetInsertPoint(&I);
|
||||
bool IsVectorType = isa<VectorType>(I.getType());
|
||||
bool IsFixedVectorType = isa<FixedVectorType>(I.getType());
|
||||
@ -4111,10 +4118,12 @@ bool VectorCombine::run() {
|
||||
if (IsFixedVectorType) {
|
||||
switch (Opcode) {
|
||||
case Instruction::InsertElement:
|
||||
MadeChange |= vectorizeLoadInsert(I);
|
||||
if (vectorizeLoadInsert(I))
|
||||
return true;
|
||||
break;
|
||||
case Instruction::ShuffleVector:
|
||||
MadeChange |= widenSubvectorLoad(I);
|
||||
if (widenSubvectorLoad(I))
|
||||
return true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@ -4124,19 +4133,25 @@ bool VectorCombine::run() {
|
||||
// This transform works with scalable and fixed vectors
|
||||
// TODO: Identify and allow other scalable transforms
|
||||
if (IsVectorType) {
|
||||
MadeChange |= scalarizeOpOrCmp(I);
|
||||
MadeChange |= scalarizeLoadExtract(I);
|
||||
MadeChange |= scalarizeExtExtract(I);
|
||||
MadeChange |= scalarizeVPIntrinsic(I);
|
||||
MadeChange |= foldInterleaveIntrinsics(I);
|
||||
if (scalarizeOpOrCmp(I))
|
||||
return true;
|
||||
if (scalarizeLoadExtract(I))
|
||||
return true;
|
||||
if (scalarizeExtExtract(I))
|
||||
return true;
|
||||
if (scalarizeVPIntrinsic(I))
|
||||
return true;
|
||||
if (foldInterleaveIntrinsics(I))
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Opcode == Instruction::Store)
|
||||
MadeChange |= foldSingleElementStore(I);
|
||||
if (foldSingleElementStore(I))
|
||||
return true;
|
||||
|
||||
// If this is an early pipeline invocation of this pass, we are done.
|
||||
if (TryEarlyFoldsOnly)
|
||||
return;
|
||||
return false;
|
||||
|
||||
// Otherwise, try folds that improve codegen but may interfere with
|
||||
// early IR canonicalizations.
|
||||
@ -4145,62 +4160,87 @@ bool VectorCombine::run() {
|
||||
if (IsFixedVectorType) {
|
||||
switch (Opcode) {
|
||||
case Instruction::InsertElement:
|
||||
MadeChange |= foldInsExtFNeg(I);
|
||||
MadeChange |= foldInsExtBinop(I);
|
||||
MadeChange |= foldInsExtVectorToShuffle(I);
|
||||
if (foldInsExtFNeg(I))
|
||||
return true;
|
||||
if (foldInsExtBinop(I))
|
||||
return true;
|
||||
if (foldInsExtVectorToShuffle(I))
|
||||
return true;
|
||||
break;
|
||||
case Instruction::ShuffleVector:
|
||||
MadeChange |= foldPermuteOfBinops(I);
|
||||
MadeChange |= foldShuffleOfBinops(I);
|
||||
MadeChange |= foldShuffleOfSelects(I);
|
||||
MadeChange |= foldShuffleOfCastops(I);
|
||||
MadeChange |= foldShuffleOfShuffles(I);
|
||||
MadeChange |= foldShuffleOfIntrinsics(I);
|
||||
MadeChange |= foldSelectShuffle(I);
|
||||
MadeChange |= foldShuffleToIdentity(I);
|
||||
if (foldPermuteOfBinops(I))
|
||||
return true;
|
||||
if (foldShuffleOfBinops(I))
|
||||
return true;
|
||||
if (foldShuffleOfSelects(I))
|
||||
return true;
|
||||
if (foldShuffleOfCastops(I))
|
||||
return true;
|
||||
if (foldShuffleOfShuffles(I))
|
||||
return true;
|
||||
if (foldShuffleOfIntrinsics(I))
|
||||
return true;
|
||||
if (foldSelectShuffle(I))
|
||||
return true;
|
||||
if (foldShuffleToIdentity(I))
|
||||
return true;
|
||||
break;
|
||||
case Instruction::Load:
|
||||
MadeChange |= shrinkLoadForShuffles(I);
|
||||
if (shrinkLoadForShuffles(I))
|
||||
return true;
|
||||
break;
|
||||
case Instruction::BitCast:
|
||||
MadeChange |= foldBitcastShuffle(I);
|
||||
if (foldBitcastShuffle(I))
|
||||
return true;
|
||||
break;
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor:
|
||||
MadeChange |= foldBitOpOfCastops(I);
|
||||
if (foldBitOpOfCastops(I))
|
||||
return true;
|
||||
break;
|
||||
case Instruction::PHI:
|
||||
MadeChange |= shrinkPhiOfShuffles(I);
|
||||
if (shrinkPhiOfShuffles(I))
|
||||
return true;
|
||||
break;
|
||||
default:
|
||||
MadeChange |= shrinkType(I);
|
||||
if (shrinkType(I))
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (Opcode) {
|
||||
case Instruction::Call:
|
||||
MadeChange |= foldShuffleFromReductions(I);
|
||||
MadeChange |= foldCastFromReductions(I);
|
||||
if (foldShuffleFromReductions(I))
|
||||
return true;
|
||||
if (foldCastFromReductions(I))
|
||||
return true;
|
||||
break;
|
||||
case Instruction::ICmp:
|
||||
case Instruction::FCmp:
|
||||
MadeChange |= foldExtractExtract(I);
|
||||
if (foldExtractExtract(I))
|
||||
return true;
|
||||
break;
|
||||
case Instruction::Or:
|
||||
MadeChange |= foldConcatOfBoolMasks(I);
|
||||
if (foldConcatOfBoolMasks(I))
|
||||
return true;
|
||||
[[fallthrough]];
|
||||
default:
|
||||
if (Instruction::isBinaryOp(Opcode)) {
|
||||
MadeChange |= foldExtractExtract(I);
|
||||
MadeChange |= foldExtractedCmps(I);
|
||||
MadeChange |= foldBinopOfReductions(I);
|
||||
if (foldExtractExtract(I))
|
||||
return true;
|
||||
if (foldExtractedCmps(I))
|
||||
return true;
|
||||
if (foldBinopOfReductions(I))
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
bool MadeChange = false;
|
||||
for (BasicBlock &BB : F) {
|
||||
// Ignore unreachable basic blocks.
|
||||
if (!DT.isReachableFromEntry(&BB))
|
||||
@ -4209,7 +4249,7 @@ bool VectorCombine::run() {
|
||||
for (Instruction &I : make_early_inc_range(BB)) {
|
||||
if (I.isDebugOrPseudoInst())
|
||||
continue;
|
||||
FoldInst(I);
|
||||
MadeChange |= FoldInst(I);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4223,7 +4263,7 @@ bool VectorCombine::run() {
|
||||
continue;
|
||||
}
|
||||
|
||||
FoldInst(*I);
|
||||
MadeChange |= FoldInst(*I);
|
||||
}
|
||||
|
||||
return MadeChange;
|
||||
|
||||
@ -926,53 +926,17 @@ define void @same_op8_splat(ptr noalias noundef %a, ptr noundef %b, ptr noundef
|
||||
; CHECK-NEXT: [[ENTRY:.*]]:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[C]], align 4
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <16 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP5]], align 4
|
||||
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 0, i32 8>
|
||||
; CHECK-NEXT: [[STRIDED_VEC12:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 1, i32 9>
|
||||
; CHECK-NEXT: [[STRIDED_VEC13:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 2, i32 10>
|
||||
; CHECK-NEXT: [[STRIDED_VEC14:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 3, i32 11>
|
||||
; CHECK-NEXT: [[STRIDED_VEC15:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 4, i32 12>
|
||||
; CHECK-NEXT: [[STRIDED_VEC16:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 5, i32 13>
|
||||
; CHECK-NEXT: [[STRIDED_VEC17:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 6, i32 14>
|
||||
; CHECK-NEXT: [[STRIDED_VEC18:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 7, i32 15>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[WIDE_VEC19:%.*]] = load <16 x float>, ptr [[TMP6]], align 4
|
||||
; CHECK-NEXT: [[STRIDED_VEC20:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 0, i32 8>
|
||||
; CHECK-NEXT: [[STRIDED_VEC21:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 1, i32 9>
|
||||
; CHECK-NEXT: [[STRIDED_VEC22:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 2, i32 10>
|
||||
; CHECK-NEXT: [[STRIDED_VEC23:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 3, i32 11>
|
||||
; CHECK-NEXT: [[STRIDED_VEC24:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 4, i32 12>
|
||||
; CHECK-NEXT: [[STRIDED_VEC25:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 5, i32 13>
|
||||
; CHECK-NEXT: [[STRIDED_VEC26:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 6, i32 14>
|
||||
; CHECK-NEXT: [[STRIDED_VEC27:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 7, i32 15>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[STRIDED_VEC20]], <2 x float> [[STRIDED_VEC21]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[STRIDED_VEC]], <2 x float> [[STRIDED_VEC12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x float> [[TMP8]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[TMP7]], [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[STRIDED_VEC22]], <2 x float> [[STRIDED_VEC23]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[STRIDED_VEC13]], <2 x float> [[STRIDED_VEC14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <4 x float> [[TMP12]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP13]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x float> [[STRIDED_VEC24]], <2 x float> [[STRIDED_VEC25]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[STRIDED_VEC15]], <2 x float> [[STRIDED_VEC16]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = fmul fast <4 x float> [[TMP16]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = fadd fast <4 x float> [[TMP15]], [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x float> [[STRIDED_VEC26]], <2 x float> [[STRIDED_VEC27]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[STRIDED_VEC17]], <2 x float> [[STRIDED_VEC18]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = fmul fast <4 x float> [[TMP20]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = fadd fast <4 x float> [[TMP19]], [[TMP21]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> [[TMP22]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP23]], <8 x float> [[TMP24]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[TMP1]]
|
||||
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x float> [[WIDE_VEC19]], [[TMP4]]
|
||||
; CHECK-NEXT: store <16 x float> [[INTERLEAVED_VEC]], ptr [[TMP6]], align 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 144
|
||||
|
||||
@ -404,13 +404,13 @@ define <16 x i16> @add_v16i16_FEuCBA98765432u0(<16 x i16> %a, <16 x i16> %b) {
|
||||
; SSE4-LABEL: @add_v16i16_FEuCBA98765432u0(
|
||||
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 poison, i32 11, i32 12, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 poison, i32 26, i32 29, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 11, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 26, i32 29, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP6:%.*]] = add <16 x i16> [[TMP4]], [[TMP5]]
|
||||
; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 poison, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 poison, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP9:%.*]] = add <16 x i16> [[TMP7]], [[TMP8]]
|
||||
; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[TMP6]], <16 x i32> <i32 3, i32 2, i32 poison, i32 1, i32 0, i32 27, i32 26, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16>
|
||||
; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[TMP6]], <16 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16>
|
||||
; SSE4-NEXT: ret <16 x i16> [[RESULT]]
|
||||
;
|
||||
; AVX2-LABEL: @add_v16i16_FEuCBA98765432u0(
|
||||
|
||||
@ -398,13 +398,13 @@ define <16 x i16> @sub_v16i16_FEuCBA98765432u0(<16 x i16> %a, <16 x i16> %b) {
|
||||
; SSE4-LABEL: @sub_v16i16_FEuCBA98765432u0(
|
||||
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 poison, i32 10, i32 12, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 poison, i32 27, i32 29, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 27, i32 29, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[TMP4]], [[TMP5]]
|
||||
; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 poison, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 poison, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; SSE4-NEXT: [[TMP9:%.*]] = sub <16 x i16> [[TMP7]], [[TMP8]]
|
||||
; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[TMP6]], <16 x i32> <i32 3, i32 2, i32 poison, i32 1, i32 0, i32 27, i32 26, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16>
|
||||
; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[TMP6]], <16 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16>
|
||||
; SSE4-NEXT: ret <16 x i16> [[RESULT]]
|
||||
;
|
||||
; AVX2-LABEL: @sub_v16i16_FEuCBA98765432u0(
|
||||
|
||||
@ -268,7 +268,7 @@ define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-LABEL: @ext5_ext0_add(
|
||||
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]]
|
||||
; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0
|
||||
; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret i8 [[R]]
|
||||
;
|
||||
%e0 = extractelement <16 x i8> %x, i32 5
|
||||
@ -294,7 +294,7 @@ define float @ext1_ext0_fmul(<4 x float> %x) {
|
||||
; CHECK-LABEL: @ext1_ext0_fmul(
|
||||
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[SHIFT]], [[X]]
|
||||
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
|
||||
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret float [[R]]
|
||||
;
|
||||
%e0 = extractelement <4 x float> %x, i32 1
|
||||
@ -363,7 +363,7 @@ define float @ext7_ext4_fmul_v8f32(<8 x float> %x) {
|
||||
; AVX-LABEL: @ext7_ext4_fmul_v8f32(
|
||||
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison, i32 poison, i32 poison>
|
||||
; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]]
|
||||
; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i64 4
|
||||
; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i32 4
|
||||
; AVX-NEXT: ret float [[R]]
|
||||
;
|
||||
%e0 = extractelement <8 x float> %x, i32 7
|
||||
@ -484,7 +484,7 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], [[SHIFT1]]
|
||||
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[SHIFT2]], [[TMP2]]
|
||||
; CHECK-NEXT: [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
|
||||
; CHECK-NEXT: [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[Z0123]]
|
||||
;
|
||||
%z = and <4 x i32> %x, %y
|
||||
@ -504,7 +504,7 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]]
|
||||
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]]
|
||||
; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
|
||||
; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[X210]]
|
||||
;
|
||||
%x0 = extractelement <4 x i32> %x, i32 0
|
||||
@ -523,7 +523,7 @@ define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]]
|
||||
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHIFT2]], [[TMP2]]
|
||||
; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
|
||||
; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[X2Y210]]
|
||||
;
|
||||
%y0 = extractelement <4 x i32> %y, i32 0
|
||||
|
||||
@ -268,7 +268,7 @@ define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-LABEL: @ext5_ext0_add(
|
||||
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]]
|
||||
; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0
|
||||
; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret i8 [[R]]
|
||||
;
|
||||
%e0 = extractelement <16 x i8> %x, i32 5
|
||||
@ -294,7 +294,7 @@ define float @ext1_ext0_fmul(<4 x float> %x) {
|
||||
; CHECK-LABEL: @ext1_ext0_fmul(
|
||||
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[SHIFT]], [[X]]
|
||||
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
|
||||
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret float [[R]]
|
||||
;
|
||||
%e0 = extractelement <4 x float> %x, i32 1
|
||||
@ -363,7 +363,7 @@ define float @ext7_ext4_fmul_v8f32(<8 x float> %x) {
|
||||
; AVX-LABEL: @ext7_ext4_fmul_v8f32(
|
||||
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison, i32 poison, i32 poison>
|
||||
; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]]
|
||||
; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i64 4
|
||||
; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i32 4
|
||||
; AVX-NEXT: ret float [[R]]
|
||||
;
|
||||
%e0 = extractelement <8 x float> %x, i32 7
|
||||
@ -490,7 +490,7 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], [[SHIFT1]]
|
||||
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[SHIFT2]], [[TMP2]]
|
||||
; CHECK-NEXT: [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
|
||||
; CHECK-NEXT: [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[Z0123]]
|
||||
;
|
||||
%z = and <4 x i32> %x, %y
|
||||
@ -510,7 +510,7 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]]
|
||||
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]]
|
||||
; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
|
||||
; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[X210]]
|
||||
;
|
||||
%x0 = extractelement <4 x i32> %x, i32 0
|
||||
@ -529,7 +529,7 @@ define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]]
|
||||
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHIFT2]], [[TMP2]]
|
||||
; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
|
||||
; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[X2Y210]]
|
||||
;
|
||||
%y0 = extractelement <4 x i32> %y, i32 0
|
||||
@ -573,10 +573,8 @@ define i64 @instsimplify_folder_crash(<4 x i64> %in) {
|
||||
; CHECK-LABEL: @instsimplify_folder_crash(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[SHUFFLE_1:%.*]] = shufflevector <4 x i64> [[IN:%.*]], <4 x i64> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[E_0:%.*]] = extractelement <4 x i64> zeroinitializer, i64 0
|
||||
; CHECK-NEXT: [[E_1:%.*]] = extractelement <4 x i64> [[SHUFFLE_1]], i64 1
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i64 [[E_1]], [[E_0]]
|
||||
; CHECK-NEXT: ret i64 [[OR]]
|
||||
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i64> [[SHUFFLE_1]], <4 x i64> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
; CHECK-NEXT: ret i64 0
|
||||
;
|
||||
entry:
|
||||
%shuffle.1 = shufflevector <4 x i64> %in, <4 x i64> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
||||
|
||||
@ -130,7 +130,7 @@ define i1 @cmp10_v2f64(<2 x double> %x, <2 x double> %y) {
|
||||
; AVX-LABEL: @cmp10_v2f64(
|
||||
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
|
||||
; AVX-NEXT: [[TMP1:%.*]] = fcmp ule <2 x double> [[SHIFT]], [[Y:%.*]]
|
||||
; AVX-NEXT: [[CMP:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0
|
||||
; AVX-NEXT: [[CMP:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
|
||||
; AVX-NEXT: ret i1 [[CMP]]
|
||||
;
|
||||
%x1 = extractelement <2 x double> %x, i32 1
|
||||
|
||||
@ -27,6 +27,8 @@ define void @multiple_extract(ptr %p) {
|
||||
; infinite loop if we fold an extract that is waiting to be erased
|
||||
define void @unused_extract(ptr %p) {
|
||||
; CHECK-LABEL: @unused_extract(
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, ptr [[P:%.*]], align 8
|
||||
; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[LOAD]], i64 1
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%load = load <4 x float>, ptr %p, align 8
|
||||
|
||||
@ -253,7 +253,8 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %
|
||||
; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
|
||||
; CHECK-NEXT: [[R:%.*]] = load <8 x i16>, ptr [[GEP]], align 2
|
||||
; CHECK-NEXT: ret <8 x i16> [[R]]
|
||||
; CHECK-NEXT: [[R1:%.*]] = shufflevector <8 x i16> [[R]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; CHECK-NEXT: ret <8 x i16> [[R1]]
|
||||
;
|
||||
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
|
||||
%s = load i16, ptr %gep, align 2
|
||||
@ -341,7 +342,8 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %
|
||||
; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0
|
||||
; CHECK-NEXT: [[R:%.*]] = load <8 x i16>, ptr [[GEP]], align 16
|
||||
; CHECK-NEXT: ret <8 x i16> [[R]]
|
||||
; CHECK-NEXT: [[R1:%.*]] = shufflevector <8 x i16> [[R]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
; CHECK-NEXT: ret <8 x i16> [[R1]]
|
||||
;
|
||||
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0
|
||||
%s = load i16, ptr %gep, align 16
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user