[VPlan] Explicitly replicate VPInstructions by VF. (#155102)

Extend replicateByVF added in #142433 (aa240293190) to also explicitly
unroll replicating VPInstructions.

Now the only remaining case where we replicate for all lanes is
VPReplicateRecipes in replicate regions.

PR: https://github.com/llvm/llvm-project/pull/155102
This commit is contained in:
Florian Hahn 2025-09-12 17:06:26 +01:00 committed by GitHub
parent ed1f1b88e4
commit b8eaceb39b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 119 additions and 119 deletions

View File

@ -343,37 +343,21 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
LastLane = 0;
}
auto *LastInst = cast<Instruction>(get(Def, LastLane));
// We need to construct the vector value for a single-scalar value by
// broadcasting the scalar to all lanes.
// TODO: Replace by introducing Broadcast VPInstructions.
assert(IsSingleScalar && "must be a single-scalar at this point");
// Set the insert point after the last scalarized instruction or after the
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
// will directly follow the scalar definitions.
auto OldIP = Builder.saveIP();
auto *LastInst = cast<Instruction>(get(Def, LastLane));
auto NewIP = isa<PHINode>(LastInst)
? LastInst->getParent()->getFirstNonPHIIt()
: std::next(BasicBlock::iterator(LastInst));
Builder.SetInsertPoint(&*NewIP);
// However, if we are vectorizing, we need to construct the vector values.
// If the value is known to be uniform after vectorization, we can just
// broadcast the scalar value corresponding to lane zero. Otherwise, we
// construct the vector values using insertelement instructions. Since the
// resulting vectors are stored in State, we will only generate the
// insertelements once.
Value *VectorValue = nullptr;
if (IsSingleScalar) {
VectorValue = GetBroadcastInstrs(ScalarValue);
set(Def, VectorValue);
} else {
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
assert(isa<VPInstruction>(Def) &&
"Explicit BuildVector recipes must have"
"handled packing for non-VPInstructions.");
// Initialize packing with insertelements to start from poison.
VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
VectorValue = packScalarIntoVectorizedValue(Def, VectorValue, Lane);
set(Def, VectorValue);
}
Value *VectorValue = GetBroadcastInstrs(ScalarValue);
set(Def, VectorValue);
Builder.restoreIP(OldIP);
return VectorValue;
}

View File

@ -908,6 +908,8 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
return R && classof(R);
}
virtual VPRecipeWithIRFlags *clone() override = 0;
static inline bool classof(const VPSingleDefRecipe *U) {
auto *R = dyn_cast<VPRecipeBase>(U);
return R && classof(R);
@ -1061,13 +1063,6 @@ public:
VScale,
};
private:
typedef unsigned char OpcodeTy;
OpcodeTy Opcode;
/// An optional name that can be used for the generated IR instruction.
const std::string Name;
/// Returns true if this VPInstruction generates scalar values for all lanes.
/// Most VPInstructions generate a single value per part, either vector or
/// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
@ -1076,6 +1071,13 @@ private:
/// underlying ingredient.
bool doesGeneratePerAllLanes() const;
private:
typedef unsigned char OpcodeTy;
OpcodeTy Opcode;
/// An optional name that can be used for the generated IR instruction.
const std::string Name;
/// Returns true if we can generate a scalar for the first lane only if
/// needed.
bool canGenerateScalarForFirstLane() const;
@ -1085,11 +1087,6 @@ private:
/// existing value is returned rather than a generated one.
Value *generate(VPTransformState &State);
/// Utility methods serving execute(): generates a scalar single instance of
/// the modeled instruction for a given lane. \returns the scalar generated
/// value for lane \p Lane.
Value *generatePerLane(VPTransformState &State, const VPLane &Lane);
#if !defined(NDEBUG)
/// Return the number of operands determined by the opcode of the
/// VPInstruction. Returns -1u if the number of operands cannot be determined

View File

@ -564,16 +564,6 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
}
}
Value *VPInstruction::generatePerLane(VPTransformState &State,
const VPLane &Lane) {
IRBuilderBase &Builder = State.Builder;
assert(getOpcode() == VPInstruction::PtrAdd &&
"only PtrAdd opcodes are supported for now");
return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
State.get(getOperand(1), Lane), Name);
}
/// Create a conditional branch using \p Cond branching to the successors of \p
/// VPBB. Note that the first successor is always forward (i.e. not created yet)
/// while the second successor may already have been created (if it is a header
@ -1197,24 +1187,13 @@ void VPInstruction::execute(VPTransformState &State) {
"Set flags not supported for the provided opcode");
if (hasFastMathFlags())
State.Builder.setFastMathFlags(getFastMathFlags());
bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
(vputils::onlyFirstLaneUsed(this) ||
isVectorToScalar() || isSingleScalar());
bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
if (GeneratesPerAllLanes) {
for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue();
Lane != NumLanes; ++Lane) {
Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
assert(GeneratedValue && "generatePerLane must produce a value");
State.set(this, GeneratedValue, VPLane(Lane));
}
return;
}
Value *GeneratedValue = generate(State);
if (!hasResult())
return;
assert(GeneratedValue && "generate must produce a value");
bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
(vputils::onlyFirstLaneUsed(this) ||
isVectorToScalar() || isSingleScalar());
assert((((GeneratedValue->getType()->isVectorTy() ||
GeneratedValue->getType()->isStructTy()) ==
!GeneratesPerFirstLaneOnly) ||
@ -1287,6 +1266,12 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::Broadcast:
case VPInstruction::ReductionStartVector:
return true;
case VPInstruction::BuildStructVector:
case VPInstruction::BuildVector:
// Before replicating by VF, Build(Struct)Vector uses all lanes of the
// operand, after replicating its operands only the first lane is used.
// Before replicating, it will have only a single operand.
return getNumOperands() > 1;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
case VPInstruction::WidePtrAdd:

View File

@ -3695,34 +3695,39 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
vp_depth_first_shallow(Plan.getEntry()));
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(LoopRegion->getEntry()));
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes,
// excluding ones in replicate regions. Those are not materialized explicitly
// yet. Those vector users are still handled in VPReplicateRegion::execute(),
// via shouldPack().
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes and
// VPInstructions, excluding ones in replicate regions. Those are not
// materialized explicitly yet. Those vector users are still handled in
// VPReplicateRegion::execute(), via shouldPack().
// TODO: materialize build vectors for replicating recipes in replicating
// regions.
// TODO: materialize build vectors for VPInstructions.
for (VPBasicBlock *VPBB :
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
auto UsesVectorOrInsideReplicateRegion = [RepR, LoopRegion](VPUser *U) {
if (!isa<VPReplicateRecipe, VPInstruction>(&R))
continue;
auto *DefR = cast<VPRecipeWithIRFlags>(&R);
auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) {
VPRegionBlock *ParentRegion =
cast<VPRecipeBase>(U)->getParent()->getParent();
return !U->usesScalars(RepR) || ParentRegion != LoopRegion;
return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
};
if (!RepR || RepR->isSingleScalar() ||
none_of(RepR->users(), UsesVectorOrInsideReplicateRegion))
if ((isa<VPReplicateRecipe>(DefR) &&
cast<VPReplicateRecipe>(DefR)->isSingleScalar()) ||
(isa<VPInstruction>(DefR) &&
(vputils::onlyFirstLaneUsed(DefR) ||
!cast<VPInstruction>(DefR)->doesGeneratePerAllLanes())) ||
none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
continue;
Type *ScalarTy = TypeInfo.inferScalarType(RepR);
Type *ScalarTy = TypeInfo.inferScalarType(DefR);
unsigned Opcode = ScalarTy->isStructTy()
? VPInstruction::BuildStructVector
: VPInstruction::BuildVector;
auto *BuildVector = new VPInstruction(Opcode, {RepR});
BuildVector->insertAfter(RepR);
auto *BuildVector = new VPInstruction(Opcode, {DefR});
BuildVector->insertAfter(DefR);
RepR->replaceUsesWithIf(
DefR->replaceUsesWithIf(
BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
VPUser &U, unsigned) {
return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);

View File

@ -158,10 +158,10 @@ struct VPlanTransforms {
/// Explicitly unroll \p Plan by \p UF.
static void unrollByUF(VPlan &Plan, unsigned UF);
/// Replace each VPReplicateRecipe outside on any replicate region in \p Plan
/// with \p VF single-scalar recipes.
/// TODO: Also replicate VPReplicateRecipes inside replicate regions, thereby
/// dissolving the latter.
/// Replace each replicating VPReplicateRecipe and VPInstruction outside of
/// any replicate region in \p Plan with \p VF single-scalar recipes.
/// TODO: Also replicate VPScalarIVSteps and VPReplicateRecipes inside
/// replicate regions, thereby dissolving the latter.
static void replicateByVF(VPlan &Plan, ElementCount VF);
/// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the

View File

@ -463,15 +463,16 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
VPlanTransforms::removeDeadRecipes(Plan);
}
/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
/// Def2LaneDefs to look up scalar definitions for operands of \RepR.
static VPReplicateRecipe *
/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or
/// VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar
/// definitions for operands of \DefR.
static VPRecipeWithIRFlags *
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
VPReplicateRecipe *RepR, VPLane Lane,
VPRecipeWithIRFlags *DefR, VPLane Lane,
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
// Collect the operands at Lane, creating extracts as needed.
SmallVector<VPValue *> NewOps;
for (VPValue *Op : RepR->operands()) {
for (VPValue *Op : DefR->operands()) {
// If Op is a definition that has been unrolled, directly use the clone for
// the corresponding lane.
auto LaneDefs = Def2LaneDefs.find(Op);
@ -501,11 +502,24 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
NewOps.push_back(Ext);
}
auto *New =
new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
/*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
New->transferFlags(*RepR);
New->insertBefore(RepR);
VPRecipeWithIRFlags *New;
if (auto *RepR = dyn_cast<VPReplicateRecipe>(DefR)) {
// TODO: have cloning of replicate recipes also provide the desired result
// coupled with setting its operands to NewOps (deriving IsSingleScalar and
// Mask from the operands?)
New =
new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
/*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
} else {
assert(isa<VPInstruction>(DefR) &&
"DefR must be a VPReplicateRecipe or VPInstruction");
New = DefR->clone();
for (const auto &[Idx, Op] : enumerate(NewOps)) {
New->setOperand(Idx, Op);
}
}
New->transferFlags(*DefR);
New->insertBefore(DefR);
return New;
}
@ -530,34 +544,38 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
SmallVector<VPRecipeBase *> ToRemove;
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
if (!RepR || RepR->isSingleScalar())
if (!isa<VPInstruction, VPReplicateRecipe>(&R) ||
(isa<VPReplicateRecipe>(&R) &&
cast<VPReplicateRecipe>(&R)->isSingleScalar()) ||
(isa<VPInstruction>(&R) &&
!cast<VPInstruction>(&R)->doesGeneratePerAllLanes()))
continue;
VPBuilder Builder(RepR);
if (RepR->getNumUsers() == 0) {
// Create single-scalar version of RepR for all lanes.
auto *DefR = cast<VPRecipeWithIRFlags>(&R);
VPBuilder Builder(DefR);
if (DefR->getNumUsers() == 0) {
// Create single-scalar version of DefR for all lanes.
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs);
RepR->eraseFromParent();
cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs);
DefR->eraseFromParent();
continue;
}
/// Create single-scalar version of RepR for all lanes.
/// Create single-scalar version of DefR for all lanes.
SmallVector<VPValue *> LaneDefs;
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
LaneDefs.push_back(
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs));
cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs));
Def2LaneDefs[RepR] = LaneDefs;
Def2LaneDefs[DefR] = LaneDefs;
/// Users that only demand the first lane can use the definition for lane
/// 0.
RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
return U.onlyFirstLaneUsed(RepR);
DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) {
return U.onlyFirstLaneUsed(DefR);
});
// Update each build vector user that currently has RepR as its only
// Update each build vector user that currently has DefR as its only
// operand, to have all LaneDefs as its operands.
for (VPUser *U : to_vector(RepR->users())) {
for (VPUser *U : to_vector(DefR->users())) {
auto *VPI = dyn_cast<VPInstruction>(U);
if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
VPI->getOpcode() != VPInstruction::BuildStructVector))
@ -569,7 +587,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
for (VPValue *LaneDef : drop_begin(LaneDefs))
VPI->addOperand(LaneDef);
}
ToRemove.push_back(RepR);
ToRemove.push_back(DefR);
}
}
for (auto *R : reverse(ToRemove))

View File

@ -33,6 +33,10 @@ define void @a(ptr readnone %b) {
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x ptr> [[TMP21]], ptr [[NEXT_GEP2]], i32 1
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3
@ -649,9 +653,6 @@ define i64 @ivopt_widen_ptr_indvar_3(ptr noalias %a, i64 %stride, i64 %n) {
; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]]
; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP1]]
; STRIDED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]]
; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]]
; STRIDED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]]
; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]]
; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
; STRIDED-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[INDEX]]
; STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP12]], align 8

View File

@ -22,6 +22,8 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; IC1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
; IC1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
; IC1-NEXT: [[TMP12:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
; IC1-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> [[TMP12]], ptr [[NEXT_GEP3]], i32 1
; IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1
; IC1-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12)
; IC1-NEXT: [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13)
@ -117,8 +119,12 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; IC2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
; IC2-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
; IC2-NEXT: [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
; IC2-NEXT: [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[NEXT_GEP3]], i32 1
; IC2-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
; IC2-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; IC2-NEXT: [[TMP30:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP4]], i32 0
; IC2-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> [[TMP30]], ptr [[NEXT_GEP5]], i32 1
; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2
; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1
; IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1
@ -338,21 +344,21 @@ define void @switch_to_header(ptr %start) {
; IC1-NEXT: [[ENTRY:.*]]:
; IC1-NEXT: br label %[[LOOP_HEADER:.*]]
; IC1: [[LOOP_HEADER]]:
; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ]
; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ]
; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; IC1-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [
; IC1-NEXT: i64 120, label %[[IF_THEN]]
; IC1-NEXT: i64 120, label %[[IF_THEN1]]
; IC1-NEXT: i64 100, label %[[LOOP_LATCH]]
; IC1-NEXT: ]
; IC1: [[IF_THEN]]:
; IC1: [[IF_THEN1]]:
; IC1-NEXT: br label %[[LOOP_HEADER]]
; IC1: [[IF_THEN1:.*:]]
; IC1: [[IF_THEN:.*:]]
; IC1-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison
; IC1-NEXT: store i64 42, ptr [[GEP]], align 1
; IC1-NEXT: unreachable
; IC1: [[LOOP_LATCH]]:
; IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100
; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]]
; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]]
; IC1: [[EXIT]]:
; IC1-NEXT: ret void
;
@ -361,21 +367,21 @@ define void @switch_to_header(ptr %start) {
; IC2-NEXT: [[ENTRY:.*]]:
; IC2-NEXT: br label %[[LOOP_HEADER:.*]]
; IC2: [[LOOP_HEADER]]:
; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ]
; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ]
; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; IC2-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [
; IC2-NEXT: i64 120, label %[[IF_THEN]]
; IC2-NEXT: i64 120, label %[[IF_THEN1]]
; IC2-NEXT: i64 100, label %[[LOOP_LATCH]]
; IC2-NEXT: ]
; IC2: [[IF_THEN]]:
; IC2: [[IF_THEN1]]:
; IC2-NEXT: br label %[[LOOP_HEADER]]
; IC2: [[IF_THEN1:.*:]]
; IC2: [[IF_THEN:.*:]]
; IC2-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison
; IC2-NEXT: store i64 42, ptr [[GEP]], align 1
; IC2-NEXT: unreachable
; IC2: [[LOOP_LATCH]]:
; IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100
; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]]
; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]]
; IC2: [[EXIT]]:
; IC2-NEXT: ret void
;

View File

@ -22,7 +22,11 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ]
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>
; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]>
; CHECK-NEXT: EMIT vp<[[STEP1:%.+]]> = extractelement vp<[[STEPS]]>, ir<0>
; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEP1]]>
; CHECK-NEXT: EMIT vp<[[STEP2:%.+]]> = extractelement vp<[[STEPS]]>, ir<1>
; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEP2]]>
; CHECK-NEXT: EMIT vp<[[PTR_VEC:%.+]]> = buildvector vp<[[PTR]]>, vp<[[PTR]]>.1
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[PTR]]>
; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12>
; CHECK-NEXT: EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13>
@ -36,7 +40,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR]]>
; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR_VEC]]>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.continue:
@ -53,7 +57,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR]]>
; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR_VEC]]>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.continue:
@ -70,7 +74,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR]]>
; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR_VEC]]>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.continue: