[VPlan] Explicitly replicate VPInstructions by VF. (#155102)
Extend replicateByVF added in #142433 (aa240293190) to also explicitly unroll replicating VPInstructions. Now the only remaining case where we replicate for all lanes is VPReplicateRecipes in replicate regions. PR: https://github.com/llvm/llvm-project/pull/155102
This commit is contained in:
parent
ed1f1b88e4
commit
b8eaceb39b
@ -343,37 +343,21 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
|
||||
LastLane = 0;
|
||||
}
|
||||
|
||||
auto *LastInst = cast<Instruction>(get(Def, LastLane));
|
||||
// We need to construct the vector value for a single-scalar value by
|
||||
// broadcasting the scalar to all lanes.
|
||||
// TODO: Replace by introducing Broadcast VPInstructions.
|
||||
assert(IsSingleScalar && "must be a single-scalar at this point");
|
||||
// Set the insert point after the last scalarized instruction or after the
|
||||
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
|
||||
// will directly follow the scalar definitions.
|
||||
auto OldIP = Builder.saveIP();
|
||||
auto *LastInst = cast<Instruction>(get(Def, LastLane));
|
||||
auto NewIP = isa<PHINode>(LastInst)
|
||||
? LastInst->getParent()->getFirstNonPHIIt()
|
||||
: std::next(BasicBlock::iterator(LastInst));
|
||||
Builder.SetInsertPoint(&*NewIP);
|
||||
|
||||
// However, if we are vectorizing, we need to construct the vector values.
|
||||
// If the value is known to be uniform after vectorization, we can just
|
||||
// broadcast the scalar value corresponding to lane zero. Otherwise, we
|
||||
// construct the vector values using insertelement instructions. Since the
|
||||
// resulting vectors are stored in State, we will only generate the
|
||||
// insertelements once.
|
||||
Value *VectorValue = nullptr;
|
||||
if (IsSingleScalar) {
|
||||
VectorValue = GetBroadcastInstrs(ScalarValue);
|
||||
set(Def, VectorValue);
|
||||
} else {
|
||||
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
assert(isa<VPInstruction>(Def) &&
|
||||
"Explicit BuildVector recipes must have"
|
||||
"handled packing for non-VPInstructions.");
|
||||
// Initialize packing with insertelements to start from poison.
|
||||
VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
|
||||
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
|
||||
VectorValue = packScalarIntoVectorizedValue(Def, VectorValue, Lane);
|
||||
set(Def, VectorValue);
|
||||
}
|
||||
Value *VectorValue = GetBroadcastInstrs(ScalarValue);
|
||||
set(Def, VectorValue);
|
||||
Builder.restoreIP(OldIP);
|
||||
return VectorValue;
|
||||
}
|
||||
|
||||
@ -908,6 +908,8 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
|
||||
return R && classof(R);
|
||||
}
|
||||
|
||||
virtual VPRecipeWithIRFlags *clone() override = 0;
|
||||
|
||||
static inline bool classof(const VPSingleDefRecipe *U) {
|
||||
auto *R = dyn_cast<VPRecipeBase>(U);
|
||||
return R && classof(R);
|
||||
@ -1061,13 +1063,6 @@ public:
|
||||
VScale,
|
||||
};
|
||||
|
||||
private:
|
||||
typedef unsigned char OpcodeTy;
|
||||
OpcodeTy Opcode;
|
||||
|
||||
/// An optional name that can be used for the generated IR instruction.
|
||||
const std::string Name;
|
||||
|
||||
/// Returns true if this VPInstruction generates scalar values for all lanes.
|
||||
/// Most VPInstructions generate a single value per part, either vector or
|
||||
/// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
|
||||
@ -1076,6 +1071,13 @@ private:
|
||||
/// underlying ingredient.
|
||||
bool doesGeneratePerAllLanes() const;
|
||||
|
||||
private:
|
||||
typedef unsigned char OpcodeTy;
|
||||
OpcodeTy Opcode;
|
||||
|
||||
/// An optional name that can be used for the generated IR instruction.
|
||||
const std::string Name;
|
||||
|
||||
/// Returns true if we can generate a scalar for the first lane only if
|
||||
/// needed.
|
||||
bool canGenerateScalarForFirstLane() const;
|
||||
@ -1085,11 +1087,6 @@ private:
|
||||
/// existing value is returned rather than a generated one.
|
||||
Value *generate(VPTransformState &State);
|
||||
|
||||
/// Utility methods serving execute(): generates a scalar single instance of
|
||||
/// the modeled instruction for a given lane. \returns the scalar generated
|
||||
/// value for lane \p Lane.
|
||||
Value *generatePerLane(VPTransformState &State, const VPLane &Lane);
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
/// Return the number of operands determined by the opcode of the
|
||||
/// VPInstruction. Returns -1u if the number of operands cannot be determined
|
||||
|
||||
@ -564,16 +564,6 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
|
||||
}
|
||||
}
|
||||
|
||||
Value *VPInstruction::generatePerLane(VPTransformState &State,
|
||||
const VPLane &Lane) {
|
||||
IRBuilderBase &Builder = State.Builder;
|
||||
|
||||
assert(getOpcode() == VPInstruction::PtrAdd &&
|
||||
"only PtrAdd opcodes are supported for now");
|
||||
return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
|
||||
State.get(getOperand(1), Lane), Name);
|
||||
}
|
||||
|
||||
/// Create a conditional branch using \p Cond branching to the successors of \p
|
||||
/// VPBB. Note that the first successor is always forward (i.e. not created yet)
|
||||
/// while the second successor may already have been created (if it is a header
|
||||
@ -1197,24 +1187,13 @@ void VPInstruction::execute(VPTransformState &State) {
|
||||
"Set flags not supported for the provided opcode");
|
||||
if (hasFastMathFlags())
|
||||
State.Builder.setFastMathFlags(getFastMathFlags());
|
||||
bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
|
||||
(vputils::onlyFirstLaneUsed(this) ||
|
||||
isVectorToScalar() || isSingleScalar());
|
||||
bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
|
||||
if (GeneratesPerAllLanes) {
|
||||
for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue();
|
||||
Lane != NumLanes; ++Lane) {
|
||||
Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
|
||||
assert(GeneratedValue && "generatePerLane must produce a value");
|
||||
State.set(this, GeneratedValue, VPLane(Lane));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
Value *GeneratedValue = generate(State);
|
||||
if (!hasResult())
|
||||
return;
|
||||
assert(GeneratedValue && "generate must produce a value");
|
||||
bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
|
||||
(vputils::onlyFirstLaneUsed(this) ||
|
||||
isVectorToScalar() || isSingleScalar());
|
||||
assert((((GeneratedValue->getType()->isVectorTy() ||
|
||||
GeneratedValue->getType()->isStructTy()) ==
|
||||
!GeneratesPerFirstLaneOnly) ||
|
||||
@ -1287,6 +1266,12 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
|
||||
case VPInstruction::Broadcast:
|
||||
case VPInstruction::ReductionStartVector:
|
||||
return true;
|
||||
case VPInstruction::BuildStructVector:
|
||||
case VPInstruction::BuildVector:
|
||||
// Before replicating by VF, Build(Struct)Vector uses all lanes of the
|
||||
// operand, after replicating its operands only the first lane is used.
|
||||
// Before replicating, it will have only a single operand.
|
||||
return getNumOperands() > 1;
|
||||
case VPInstruction::PtrAdd:
|
||||
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
|
||||
case VPInstruction::WidePtrAdd:
|
||||
|
||||
@ -3695,34 +3695,39 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
|
||||
vp_depth_first_shallow(Plan.getEntry()));
|
||||
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
|
||||
vp_depth_first_shallow(LoopRegion->getEntry()));
|
||||
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes,
|
||||
// excluding ones in replicate regions. Those are not materialized explicitly
|
||||
// yet. Those vector users are still handled in VPReplicateRegion::execute(),
|
||||
// via shouldPack().
|
||||
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes and
|
||||
// VPInstructions, excluding ones in replicate regions. Those are not
|
||||
// materialized explicitly yet. Those vector users are still handled in
|
||||
// VPReplicateRegion::execute(), via shouldPack().
|
||||
// TODO: materialize build vectors for replicating recipes in replicating
|
||||
// regions.
|
||||
// TODO: materialize build vectors for VPInstructions.
|
||||
for (VPBasicBlock *VPBB :
|
||||
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
|
||||
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
|
||||
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
|
||||
auto UsesVectorOrInsideReplicateRegion = [RepR, LoopRegion](VPUser *U) {
|
||||
if (!isa<VPReplicateRecipe, VPInstruction>(&R))
|
||||
continue;
|
||||
auto *DefR = cast<VPRecipeWithIRFlags>(&R);
|
||||
auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) {
|
||||
VPRegionBlock *ParentRegion =
|
||||
cast<VPRecipeBase>(U)->getParent()->getParent();
|
||||
return !U->usesScalars(RepR) || ParentRegion != LoopRegion;
|
||||
return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
|
||||
};
|
||||
if (!RepR || RepR->isSingleScalar() ||
|
||||
none_of(RepR->users(), UsesVectorOrInsideReplicateRegion))
|
||||
if ((isa<VPReplicateRecipe>(DefR) &&
|
||||
cast<VPReplicateRecipe>(DefR)->isSingleScalar()) ||
|
||||
(isa<VPInstruction>(DefR) &&
|
||||
(vputils::onlyFirstLaneUsed(DefR) ||
|
||||
!cast<VPInstruction>(DefR)->doesGeneratePerAllLanes())) ||
|
||||
none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
|
||||
continue;
|
||||
|
||||
Type *ScalarTy = TypeInfo.inferScalarType(RepR);
|
||||
Type *ScalarTy = TypeInfo.inferScalarType(DefR);
|
||||
unsigned Opcode = ScalarTy->isStructTy()
|
||||
? VPInstruction::BuildStructVector
|
||||
: VPInstruction::BuildVector;
|
||||
auto *BuildVector = new VPInstruction(Opcode, {RepR});
|
||||
BuildVector->insertAfter(RepR);
|
||||
auto *BuildVector = new VPInstruction(Opcode, {DefR});
|
||||
BuildVector->insertAfter(DefR);
|
||||
|
||||
RepR->replaceUsesWithIf(
|
||||
DefR->replaceUsesWithIf(
|
||||
BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
|
||||
VPUser &U, unsigned) {
|
||||
return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
|
||||
|
||||
@ -158,10 +158,10 @@ struct VPlanTransforms {
|
||||
/// Explicitly unroll \p Plan by \p UF.
|
||||
static void unrollByUF(VPlan &Plan, unsigned UF);
|
||||
|
||||
/// Replace each VPReplicateRecipe outside on any replicate region in \p Plan
|
||||
/// with \p VF single-scalar recipes.
|
||||
/// TODO: Also replicate VPReplicateRecipes inside replicate regions, thereby
|
||||
/// dissolving the latter.
|
||||
/// Replace each replicating VPReplicateRecipe and VPInstruction outside of
|
||||
/// any replicate region in \p Plan with \p VF single-scalar recipes.
|
||||
/// TODO: Also replicate VPScalarIVSteps and VPReplicateRecipes inside
|
||||
/// replicate regions, thereby dissolving the latter.
|
||||
static void replicateByVF(VPlan &Plan, ElementCount VF);
|
||||
|
||||
/// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
|
||||
|
||||
@ -463,15 +463,16 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
|
||||
VPlanTransforms::removeDeadRecipes(Plan);
|
||||
}
|
||||
|
||||
/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
|
||||
/// Def2LaneDefs to look up scalar definitions for operands of \RepR.
|
||||
static VPReplicateRecipe *
|
||||
/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or
|
||||
/// VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar
|
||||
/// definitions for operands of \DefR.
|
||||
static VPRecipeWithIRFlags *
|
||||
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
|
||||
VPReplicateRecipe *RepR, VPLane Lane,
|
||||
VPRecipeWithIRFlags *DefR, VPLane Lane,
|
||||
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
|
||||
// Collect the operands at Lane, creating extracts as needed.
|
||||
SmallVector<VPValue *> NewOps;
|
||||
for (VPValue *Op : RepR->operands()) {
|
||||
for (VPValue *Op : DefR->operands()) {
|
||||
// If Op is a definition that has been unrolled, directly use the clone for
|
||||
// the corresponding lane.
|
||||
auto LaneDefs = Def2LaneDefs.find(Op);
|
||||
@ -501,11 +502,24 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
|
||||
NewOps.push_back(Ext);
|
||||
}
|
||||
|
||||
auto *New =
|
||||
new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
|
||||
/*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
|
||||
New->transferFlags(*RepR);
|
||||
New->insertBefore(RepR);
|
||||
VPRecipeWithIRFlags *New;
|
||||
if (auto *RepR = dyn_cast<VPReplicateRecipe>(DefR)) {
|
||||
// TODO: have cloning of replicate recipes also provide the desired result
|
||||
// coupled with setting its operands to NewOps (deriving IsSingleScalar and
|
||||
// Mask from the operands?)
|
||||
New =
|
||||
new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
|
||||
/*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
|
||||
} else {
|
||||
assert(isa<VPInstruction>(DefR) &&
|
||||
"DefR must be a VPReplicateRecipe or VPInstruction");
|
||||
New = DefR->clone();
|
||||
for (const auto &[Idx, Op] : enumerate(NewOps)) {
|
||||
New->setOperand(Idx, Op);
|
||||
}
|
||||
}
|
||||
New->transferFlags(*DefR);
|
||||
New->insertBefore(DefR);
|
||||
return New;
|
||||
}
|
||||
|
||||
@ -530,34 +544,38 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
|
||||
SmallVector<VPRecipeBase *> ToRemove;
|
||||
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
|
||||
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
|
||||
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
|
||||
if (!RepR || RepR->isSingleScalar())
|
||||
if (!isa<VPInstruction, VPReplicateRecipe>(&R) ||
|
||||
(isa<VPReplicateRecipe>(&R) &&
|
||||
cast<VPReplicateRecipe>(&R)->isSingleScalar()) ||
|
||||
(isa<VPInstruction>(&R) &&
|
||||
!cast<VPInstruction>(&R)->doesGeneratePerAllLanes()))
|
||||
continue;
|
||||
|
||||
VPBuilder Builder(RepR);
|
||||
if (RepR->getNumUsers() == 0) {
|
||||
// Create single-scalar version of RepR for all lanes.
|
||||
auto *DefR = cast<VPRecipeWithIRFlags>(&R);
|
||||
VPBuilder Builder(DefR);
|
||||
if (DefR->getNumUsers() == 0) {
|
||||
// Create single-scalar version of DefR for all lanes.
|
||||
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
|
||||
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs);
|
||||
RepR->eraseFromParent();
|
||||
cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs);
|
||||
DefR->eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
/// Create single-scalar version of RepR for all lanes.
|
||||
/// Create single-scalar version of DefR for all lanes.
|
||||
SmallVector<VPValue *> LaneDefs;
|
||||
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
|
||||
LaneDefs.push_back(
|
||||
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs));
|
||||
cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs));
|
||||
|
||||
Def2LaneDefs[RepR] = LaneDefs;
|
||||
Def2LaneDefs[DefR] = LaneDefs;
|
||||
/// Users that only demand the first lane can use the definition for lane
|
||||
/// 0.
|
||||
RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
|
||||
return U.onlyFirstLaneUsed(RepR);
|
||||
DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) {
|
||||
return U.onlyFirstLaneUsed(DefR);
|
||||
});
|
||||
|
||||
// Update each build vector user that currently has RepR as its only
|
||||
// Update each build vector user that currently has DefR as its only
|
||||
// operand, to have all LaneDefs as its operands.
|
||||
for (VPUser *U : to_vector(RepR->users())) {
|
||||
for (VPUser *U : to_vector(DefR->users())) {
|
||||
auto *VPI = dyn_cast<VPInstruction>(U);
|
||||
if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
|
||||
VPI->getOpcode() != VPInstruction::BuildStructVector))
|
||||
@ -569,7 +587,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
|
||||
for (VPValue *LaneDef : drop_begin(LaneDefs))
|
||||
VPI->addOperand(LaneDef);
|
||||
}
|
||||
ToRemove.push_back(RepR);
|
||||
ToRemove.push_back(DefR);
|
||||
}
|
||||
}
|
||||
for (auto *R : reverse(ToRemove))
|
||||
|
||||
@ -33,6 +33,10 @@ define void @a(ptr readnone %b) {
|
||||
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
|
||||
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
|
||||
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x ptr> [[TMP21]], ptr [[NEXT_GEP2]], i32 1
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3
|
||||
@ -649,9 +653,6 @@ define i64 @ivopt_widen_ptr_indvar_3(ptr noalias %a, i64 %stride, i64 %n) {
|
||||
; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]]
|
||||
; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP1]]
|
||||
; STRIDED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]]
|
||||
; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]]
|
||||
; STRIDED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]]
|
||||
; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]]
|
||||
; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
|
||||
; STRIDED-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[INDEX]]
|
||||
; STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP12]], align 8
|
||||
|
||||
@ -22,6 +22,8 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
|
||||
; IC1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
|
||||
; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
|
||||
; IC1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
|
||||
; IC1-NEXT: [[TMP12:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
|
||||
; IC1-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> [[TMP12]], ptr [[NEXT_GEP3]], i32 1
|
||||
; IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1
|
||||
; IC1-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12)
|
||||
; IC1-NEXT: [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13)
|
||||
@ -117,8 +119,12 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
|
||||
; IC2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
|
||||
; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
|
||||
; IC2-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
|
||||
; IC2-NEXT: [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
|
||||
; IC2-NEXT: [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[NEXT_GEP3]], i32 1
|
||||
; IC2-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
|
||||
; IC2-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
||||
; IC2-NEXT: [[TMP30:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP4]], i32 0
|
||||
; IC2-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> [[TMP30]], ptr [[NEXT_GEP5]], i32 1
|
||||
; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2
|
||||
; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1
|
||||
; IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1
|
||||
@ -338,21 +344,21 @@ define void @switch_to_header(ptr %start) {
|
||||
; IC1-NEXT: [[ENTRY:.*]]:
|
||||
; IC1-NEXT: br label %[[LOOP_HEADER:.*]]
|
||||
; IC1: [[LOOP_HEADER]]:
|
||||
; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ]
|
||||
; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ]
|
||||
; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
|
||||
; IC1-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [
|
||||
; IC1-NEXT: i64 120, label %[[IF_THEN]]
|
||||
; IC1-NEXT: i64 120, label %[[IF_THEN1]]
|
||||
; IC1-NEXT: i64 100, label %[[LOOP_LATCH]]
|
||||
; IC1-NEXT: ]
|
||||
; IC1: [[IF_THEN]]:
|
||||
; IC1: [[IF_THEN1]]:
|
||||
; IC1-NEXT: br label %[[LOOP_HEADER]]
|
||||
; IC1: [[IF_THEN1:.*:]]
|
||||
; IC1: [[IF_THEN:.*:]]
|
||||
; IC1-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison
|
||||
; IC1-NEXT: store i64 42, ptr [[GEP]], align 1
|
||||
; IC1-NEXT: unreachable
|
||||
; IC1: [[LOOP_LATCH]]:
|
||||
; IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100
|
||||
; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]]
|
||||
; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]]
|
||||
; IC1: [[EXIT]]:
|
||||
; IC1-NEXT: ret void
|
||||
;
|
||||
@ -361,21 +367,21 @@ define void @switch_to_header(ptr %start) {
|
||||
; IC2-NEXT: [[ENTRY:.*]]:
|
||||
; IC2-NEXT: br label %[[LOOP_HEADER:.*]]
|
||||
; IC2: [[LOOP_HEADER]]:
|
||||
; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ]
|
||||
; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ]
|
||||
; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
|
||||
; IC2-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [
|
||||
; IC2-NEXT: i64 120, label %[[IF_THEN]]
|
||||
; IC2-NEXT: i64 120, label %[[IF_THEN1]]
|
||||
; IC2-NEXT: i64 100, label %[[LOOP_LATCH]]
|
||||
; IC2-NEXT: ]
|
||||
; IC2: [[IF_THEN]]:
|
||||
; IC2: [[IF_THEN1]]:
|
||||
; IC2-NEXT: br label %[[LOOP_HEADER]]
|
||||
; IC2: [[IF_THEN1:.*:]]
|
||||
; IC2: [[IF_THEN:.*:]]
|
||||
; IC2-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison
|
||||
; IC2-NEXT: store i64 42, ptr [[GEP]], align 1
|
||||
; IC2-NEXT: unreachable
|
||||
; IC2: [[LOOP_LATCH]]:
|
||||
; IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100
|
||||
; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]]
|
||||
; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]]
|
||||
; IC2: [[EXIT]]:
|
||||
; IC2-NEXT: ret void
|
||||
;
|
||||
|
||||
@ -22,7 +22,11 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
|
||||
; CHECK-NEXT: vector.body:
|
||||
; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ]
|
||||
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>
|
||||
; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]>
|
||||
; CHECK-NEXT: EMIT vp<[[STEP1:%.+]]> = extractelement vp<[[STEPS]]>, ir<0>
|
||||
; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEP1]]>
|
||||
; CHECK-NEXT: EMIT vp<[[STEP2:%.+]]> = extractelement vp<[[STEPS]]>, ir<1>
|
||||
; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEP2]]>
|
||||
; CHECK-NEXT: EMIT vp<[[PTR_VEC:%.+]]> = buildvector vp<[[PTR]]>, vp<[[PTR]]>.1
|
||||
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[PTR]]>
|
||||
; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12>
|
||||
; CHECK-NEXT: EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13>
|
||||
@ -36,7 +40,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
|
||||
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.if:
|
||||
; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR]]>
|
||||
; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR_VEC]]>
|
||||
; CHECK-NEXT: Successor(s): pred.store.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.continue:
|
||||
@ -53,7 +57,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
|
||||
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.if:
|
||||
; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR]]>
|
||||
; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR_VEC]]>
|
||||
; CHECK-NEXT: Successor(s): pred.store.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.continue:
|
||||
@ -70,7 +74,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
|
||||
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.if:
|
||||
; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR]]>
|
||||
; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR_VEC]]>
|
||||
; CHECK-NEXT: Successor(s): pred.store.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.continue:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user