diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 97c5b03a4b69..c4110582da1e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7306,6 +7306,16 @@ DenseMap LoopVectorizationPlanner::executePlan( VPlanTransforms::materializeVFAndVFxUF(BestVPlan, VectorPH, BestVF); VPlanTransforms::simplifyRecipes(BestVPlan); + // 0. Generate SCEV-dependent code in the entry, including TripCount, before + // making any changes to the CFG. + DenseMap ExpandedSCEVs = + VPlanTransforms::expandSCEVs(BestVPlan, *PSE.getSE()); + if (!ILV.getTripCount()) + ILV.setTripCount(BestVPlan.getTripCount()->getLiveInIRValue()); + else + assert(VectorizingEpilogue && "should only re-use the existing trip " + "count during epilogue vectorization"); + // Perform the actual loop transformation. VPTransformState State(&TTI, BestVF, LI, DT, ILV.AC, ILV.Builder, &BestVPlan, OrigLoop->getParentLoop(), @@ -7315,30 +7325,6 @@ DenseMap LoopVectorizationPlanner::executePlan( assert(DT->verify(DominatorTree::VerificationLevel::Fast)); #endif - // 0. Generate SCEV-dependent code in the entry, including TripCount, before - // making any changes to the CFG. - DenseMap ExpandedSCEVs; - auto *Entry = cast(BestVPlan.getEntry()); - State.Builder.SetInsertPoint(Entry->getIRBasicBlock()->getTerminator()); - for (VPRecipeBase &R : make_early_inc_range(*Entry)) { - auto *ExpSCEV = dyn_cast(&R); - if (!ExpSCEV) - continue; - ExpSCEV->execute(State); - ExpandedSCEVs[ExpSCEV->getSCEV()] = State.get(ExpSCEV, VPLane(0)); - VPValue *Exp = BestVPlan.getOrAddLiveIn(ExpandedSCEVs[ExpSCEV->getSCEV()]); - ExpSCEV->replaceAllUsesWith(Exp); - if (BestVPlan.getTripCount() == ExpSCEV) - BestVPlan.resetTripCount(Exp); - ExpSCEV->eraseFromParent(); - } - - if (!ILV.getTripCount()) - ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0))); - else - assert(VectorizingEpilogue && "should only re-use the existing trip " - "count during epilogue vectorization"); - // 1. Set up the skeleton for vectorization, including vector pre-header and // middle block. The vector loop is created during VPlan execution. BasicBlock *EntryBB = @@ -7776,7 +7762,7 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc, "step must be loop invariant"); VPValue *Step = - vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep(), SE); + vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep()); if (auto *TruncI = dyn_cast(PhiOrTrunc)) { return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(), IndDesc, TruncI, @@ -7798,8 +7784,7 @@ VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI( // Check if this is pointer induction. If so, build the recipe for it. if (auto *II = Legal->getPointerInductionDescriptor(Phi)) { - VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep(), - *PSE.getSE()); + VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep()); return new VPWidenPointerInductionRecipe( Phi, Operands[0], Step, &Plan.getVFxUF(), *II, LoopVectorizationPlanner::getDecisionAndClampRange( @@ -8957,7 +8942,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { [this](PHINode *P) { return Legal->getIntOrFpInductionDescriptor(P); }, - *PSE.getSE(), *TLI)) + *TLI)) return nullptr; // Collect mapping of IR header phis to header phi recipes, to be used in diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index ce985322d9ca..6814dc5de671 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3253,22 +3253,20 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe { /// Recipe to expand a SCEV expression. class VPExpandSCEVRecipe : public VPSingleDefRecipe { const SCEV *Expr; - ScalarEvolution &SE; public: - VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE) - : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {} + VPExpandSCEVRecipe(const SCEV *Expr) + : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr) {} ~VPExpandSCEVRecipe() override = default; - VPExpandSCEVRecipe *clone() override { - return new VPExpandSCEVRecipe(Expr, SE); - } + VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); } VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC) - /// Generate a canonical vector induction variable of the vector loop, with - void execute(VPTransformState &State) override; + void execute(VPTransformState &State) override { + llvm_unreachable("SCEV expressions must be expanded before final execute"); + } /// Return the cost of this VPExpandSCEVRecipe. InstructionCost computeCost(ElementCount VF, diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index b231a8429503..292ba64473d4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -487,8 +487,7 @@ static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, ScalarEvolution &SE = *PSE.getSE(); const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV, InductionTy, TheLoop); - Plan.setTripCount( - vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE)); + Plan.setTripCount(vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount)); VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph"); VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 2644935494ab..a92540f457c7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -37,7 +37,6 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" -#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include using namespace llvm; @@ -3803,18 +3802,7 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, getOperand(4)->printAsOperand(O, SlotTracker); } } -#endif -void VPExpandSCEVRecipe::execute(VPTransformState &State) { - assert(!State.Lane && "cannot be used in per-lane"); - const DataLayout &DL = SE.getDataLayout(); - SCEVExpander Exp(SE, DL, "induction", /*PreserveLCSSA=*/true); - Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), - &*State.Builder.GetInsertPoint()); - State.set(this, Res, VPLane(0)); -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "EMIT "; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 504dd9a7c22c..74add48be7c8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -34,6 +34,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/Support/Casting.h" #include "llvm/Support/TypeSize.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" using namespace llvm; using namespace VPlanPatternMatch; @@ -42,7 +43,7 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( VPlanPtr &Plan, function_ref GetIntOrFpInductionDescriptor, - ScalarEvolution &SE, const TargetLibraryInfo &TLI) { + const TargetLibraryInfo &TLI) { ReversePostOrderTraversal> RPOT( Plan->getVectorLoopRegion()); @@ -73,7 +74,7 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( } else { VPValue *Start = Plan->getOrAddLiveIn(II->getStartValue()); VPValue *Step = - vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE); + vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep()); NewRecipe = new VPWidenIntOrFpInductionRecipe( Phi, Start, Step, &Plan->getVF(), *II, Ingredient.getDebugLoc()); } @@ -3468,6 +3469,33 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH, VFxUF.replaceAllUsesWith(MulByUF); } +DenseMap +VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) { + const DataLayout &DL = SE.getDataLayout(); + SCEVExpander Expander(SE, DL, "induction", /*PreserveLCSSA=*/true); + + auto *Entry = cast(Plan.getEntry()); + BasicBlock *EntryBB = Entry->getIRBasicBlock(); + DenseMap ExpandedSCEVs; + for (VPRecipeBase &R : make_early_inc_range(*Entry)) { + if (isa(&R)) + continue; + auto *ExpSCEV = dyn_cast(&R); + if (!ExpSCEV) + break; + const SCEV *Expr = ExpSCEV->getSCEV(); + Value *Res = + Expander.expandCodeFor(Expr, Expr->getType(), EntryBB->getTerminator()); + ExpandedSCEVs[ExpSCEV->getSCEV()] = Res; + VPValue *Exp = Plan.getOrAddLiveIn(Res); + ExpSCEV->replaceAllUsesWith(Exp); + if (Plan.getTripCount() == ExpSCEV) + Plan.resetTripCount(Exp); + ExpSCEV->eraseFromParent(); + } + return ExpandedSCEVs; +} + /// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be /// converted to a narrower recipe. \p V is used by a wide recipe that feeds a /// store interleave group at index \p Idx, \p WideMember0 is the recipe feeding diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 5b3d18b237ef..b1d986892ca6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -100,7 +100,7 @@ struct VPlanTransforms { VPlanPtr &Plan, function_ref GetIntOrFpInductionDescriptor, - ScalarEvolution &SE, const TargetLibraryInfo &TLI); + const TargetLibraryInfo &TLI); /// Try to have all users of fixed-order recurrences appear after the recipe /// defining their previous value, by either sinking users or hoisting recipes @@ -282,6 +282,13 @@ struct VPlanTransforms { static void materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH, ElementCount VF); + /// Expand VPExpandSCEVRecipes in \p Plan's entry block. Each + /// VPExpandSCEVRecipe is replaced with a live-in wrapping the expanded IR + /// value. A mapping from SCEV expressions to their expanded IR value is + /// returned. + static DenseMap expandSCEVs(VPlan &Plan, + ScalarEvolution &SE); + /// Try to convert a plan with interleave groups with VF elements to a plan /// with the interleave groups replaced by wide loads and stores processing VF /// elements, if all transformed interleave groups access the full vector diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index b2230c491d73..700a733bf9f2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -29,8 +29,7 @@ bool vputils::onlyScalarValuesUsed(const VPValue *Def) { [Def](const VPUser *U) { return U->usesScalars(Def); }); } -VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, - ScalarEvolution &SE) { +VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { if (auto *Expanded = Plan.getSCEVExpansion(Expr)) return Expanded; VPValue *Expanded = nullptr; @@ -45,7 +44,7 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, if (U && !isa(U->getValue())) { Expanded = Plan.getOrAddLiveIn(U->getValue()); } else { - Expanded = new VPExpandSCEVRecipe(Expr, SE); + Expanded = new VPExpandSCEVRecipe(Expr); Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe()); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index 3cf02b638bbb..9e1d325a4d8d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -33,8 +33,7 @@ bool onlyScalarValuesUsed(const VPValue *Def); /// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's /// pre-header already contains a recipe expanding \p Expr, return it. If not, /// create a new one. -VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, - ScalarEvolution &SE); +VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr); /// Return the SCEV expression for \p V. Returns SCEVCouldNotCompute if no /// SCEV expression could be constructed. diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index e74393975401..a943e7ac12b1 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -203,7 +203,7 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) { VPInstruction::BranchOnCond, {Plan->getOrAddLiveIn(ConstantInt::getTrue(F->getContext()))})); VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( - Plan, [](PHINode *P) { return nullptr; }, *SE, TLI); + Plan, [](PHINode *P) { return nullptr; }, TLI); VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock(); EXPECT_EQ(0u, Entry->getNumPredecessors());