[VPlan] Materialize Build(Struct)Vectors for VPReplicateRecipes. (NFCI) (#151487)
Materialze Build(Struct)Vectors explicitly for VPRecplicateRecipes, to serve their users requiring a vector, instead of doing so when unrolling by VF. Now we only need to implicitly build vectors in VPTransformState::get for VPInstructions. Once they are also unrolled by VF we can remove the code-path alltogether. PR: https://github.com/llvm/llvm-project/pull/151487
This commit is contained in:
parent
f5a648f919
commit
7e9989390d
@ -7254,8 +7254,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
|
||||
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
|
||||
// cost model is complete for better cost estimates.
|
||||
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF);
|
||||
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
|
||||
VPlanTransforms::runPass(VPlanTransforms::materializeBuildVectors, BestVPlan);
|
||||
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
|
||||
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
|
||||
bool HasBranchWeights =
|
||||
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator());
|
||||
if (HasBranchWeights) {
|
||||
|
@ -355,6 +355,9 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
|
||||
set(Def, VectorValue);
|
||||
} else {
|
||||
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
assert(isa<VPInstruction>(Def) &&
|
||||
"Explicit BuildVector recipes must have"
|
||||
"handled packing for non-VPInstructions.");
|
||||
// Initialize packing with insertelements to start from poison.
|
||||
VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
|
||||
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
|
||||
|
@ -460,6 +460,8 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
|
||||
case Instruction::Load:
|
||||
case VPInstruction::AnyOf:
|
||||
case VPInstruction::BranchOnCond:
|
||||
case VPInstruction::BuildStructVector:
|
||||
case VPInstruction::BuildVector:
|
||||
case VPInstruction::CalculateTripCountMinusVF:
|
||||
case VPInstruction::CanonicalIVIncrementForPart:
|
||||
case VPInstruction::ExplicitVectorLength:
|
||||
|
@ -3282,6 +3282,52 @@ void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan,
|
||||
BTC->replaceAllUsesWith(TCMO);
|
||||
}
|
||||
|
||||
void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
|
||||
if (Plan.hasScalarVFOnly())
|
||||
return;
|
||||
|
||||
VPTypeAnalysis TypeInfo(Plan);
|
||||
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
|
||||
auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
|
||||
vp_depth_first_shallow(Plan.getEntry()));
|
||||
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
|
||||
vp_depth_first_shallow(LoopRegion->getEntry()));
|
||||
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes,
|
||||
// excluding ones in replicate regions. Those are not materialized explicitly
|
||||
// yet. Those vector users are still handled in VPReplicateRegion::execute(),
|
||||
// via shouldPack().
|
||||
// TODO: materialize build vectors for replicating recipes in replicating
|
||||
// regions.
|
||||
// TODO: materialize build vectors for VPInstructions.
|
||||
for (VPBasicBlock *VPBB :
|
||||
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
|
||||
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
|
||||
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
|
||||
auto UsesVectorOrInsideReplicateRegion = [RepR, LoopRegion](VPUser *U) {
|
||||
VPRegionBlock *ParentRegion =
|
||||
cast<VPRecipeBase>(U)->getParent()->getParent();
|
||||
return !U->usesScalars(RepR) || ParentRegion != LoopRegion;
|
||||
};
|
||||
if (!RepR || RepR->isSingleScalar() ||
|
||||
none_of(RepR->users(), UsesVectorOrInsideReplicateRegion))
|
||||
continue;
|
||||
|
||||
Type *ScalarTy = TypeInfo.inferScalarType(RepR);
|
||||
unsigned Opcode = ScalarTy->isStructTy()
|
||||
? VPInstruction::BuildStructVector
|
||||
: VPInstruction::BuildVector;
|
||||
auto *BuildVector = new VPInstruction(Opcode, {RepR});
|
||||
BuildVector->insertAfter(RepR);
|
||||
|
||||
RepR->replaceUsesWithIf(
|
||||
BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
|
||||
VPUser &U, unsigned) {
|
||||
return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VPlanTransforms::materializeVectorTripCount(VPlan &Plan,
|
||||
VPBasicBlock *VectorPHVPBB,
|
||||
bool TailByMasking,
|
||||
|
@ -274,6 +274,10 @@ struct VPlanTransforms {
|
||||
static void materializeBackedgeTakenCount(VPlan &Plan,
|
||||
VPBasicBlock *VectorPH);
|
||||
|
||||
/// Add explicit Build[Struct]Vector recipes that combine multiple scalar
|
||||
/// values into single vectors.
|
||||
static void materializeBuildVectors(VPlan &Plan);
|
||||
|
||||
/// Materialize VF and VFxUF to be computed explicitly using VPInstructions.
|
||||
static void materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
|
||||
ElementCount VF);
|
||||
|
@ -464,10 +464,12 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
|
||||
VPlanTransforms::removeDeadRecipes(Plan);
|
||||
}
|
||||
|
||||
/// Create a single-scalar clone of \p RepR for lane \p Lane.
|
||||
static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
|
||||
Type *IdxTy, VPReplicateRecipe *RepR,
|
||||
VPLane Lane) {
|
||||
/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
|
||||
/// Def2LaneDefs to look up scalar definitions for operands of \RepR.
|
||||
static VPReplicateRecipe *
|
||||
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
|
||||
VPReplicateRecipe *RepR, VPLane Lane,
|
||||
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
|
||||
// Collect the operands at Lane, creating extracts as needed.
|
||||
SmallVector<VPValue *> NewOps;
|
||||
for (VPValue *Op : RepR->operands()) {
|
||||
@ -480,6 +482,14 @@ static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
|
||||
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
|
||||
continue;
|
||||
}
|
||||
// If Op is a definition that has been unrolled, directly use the clone for
|
||||
// the corresponding lane.
|
||||
auto LaneDefs = Def2LaneDefs.find(Op);
|
||||
if (LaneDefs != Def2LaneDefs.end()) {
|
||||
NewOps.push_back(LaneDefs->second[Lane.getKnownLane()]);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Look through buildvector to avoid unnecessary extracts.
|
||||
if (match(Op, m_BuildVector())) {
|
||||
NewOps.push_back(
|
||||
@ -512,6 +522,13 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
|
||||
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()));
|
||||
auto VPBBsToUnroll =
|
||||
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
|
||||
// A mapping of current VPValue definitions to collections of new VPValues
|
||||
// defined per lane. Serves to hook-up potential users of current VPValue
|
||||
// definition that are replicated-per-VF later.
|
||||
DenseMap<VPValue *, SmallVector<VPValue *>> Def2LaneDefs;
|
||||
// The removal of current recipes being replaced by new ones needs to be
|
||||
// delayed after Def2LaneDefs is no longer in use.
|
||||
SmallVector<VPRecipeBase *> ToRemove;
|
||||
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
|
||||
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
|
||||
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
|
||||
@ -523,12 +540,12 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
|
||||
if (isa<StoreInst>(RepR->getUnderlyingInstr()) &&
|
||||
vputils::isSingleScalar(RepR->getOperand(1))) {
|
||||
// Stores to invariant addresses need to store the last lane only.
|
||||
cloneForLane(Plan, Builder, IdxTy, RepR,
|
||||
VPLane::getLastLaneForVF(VF));
|
||||
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF),
|
||||
Def2LaneDefs);
|
||||
} else {
|
||||
// Create single-scalar version of RepR for all lanes.
|
||||
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
|
||||
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I));
|
||||
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs);
|
||||
}
|
||||
RepR->eraseFromParent();
|
||||
continue;
|
||||
@ -536,23 +553,33 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
|
||||
/// Create single-scalar version of RepR for all lanes.
|
||||
SmallVector<VPValue *> LaneDefs;
|
||||
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
|
||||
LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I)));
|
||||
LaneDefs.push_back(
|
||||
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs));
|
||||
|
||||
Def2LaneDefs[RepR] = LaneDefs;
|
||||
/// Users that only demand the first lane can use the definition for lane
|
||||
/// 0.
|
||||
RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
|
||||
return U.onlyFirstLaneUsed(RepR);
|
||||
});
|
||||
|
||||
// If needed, create a Build(Struct)Vector recipe to insert the scalar
|
||||
// lane values into a vector.
|
||||
Type *ResTy = RepR->getUnderlyingInstr()->getType();
|
||||
VPValue *VecRes = Builder.createNaryOp(
|
||||
ResTy->isStructTy() ? VPInstruction::BuildStructVector
|
||||
: VPInstruction::BuildVector,
|
||||
LaneDefs);
|
||||
RepR->replaceAllUsesWith(VecRes);
|
||||
RepR->eraseFromParent();
|
||||
// Update each build vector user that currently has RepR as its only
|
||||
// operand, to have all LaneDefs as its operands.
|
||||
for (VPUser *U : to_vector(RepR->users())) {
|
||||
auto *VPI = dyn_cast<VPInstruction>(U);
|
||||
if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
|
||||
VPI->getOpcode() != VPInstruction::BuildStructVector))
|
||||
continue;
|
||||
assert(VPI->getNumOperands() == 1 &&
|
||||
"Build(Struct)Vector must have a single operand before "
|
||||
"replicating by VF");
|
||||
VPI->setOperand(0, LaneDefs[0]);
|
||||
for (VPValue *LaneDef : drop_begin(LaneDefs))
|
||||
VPI->addOperand(LaneDef);
|
||||
}
|
||||
ToRemove.push_back(RepR);
|
||||
}
|
||||
}
|
||||
for (auto *R : reverse(ToRemove))
|
||||
R->eraseFromParent();
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user