
A lot of time getCanonicalIV() is used to get the canonical IV type, e.g. to instantiate a VPTypeAnalysis or to get the LLVMContext. However VPTypeAnalysis has a constructor that takes the VPlan directly and there's a method on VPlan to get the LLVMContext directly, so use those instead where possible. This lets us remove a constructor on VPTypeAnalysis. Also remove an unused LLVMContext argument in UnrollState whilst we're here.
652 lines
25 KiB
C++
652 lines
25 KiB
C++
//===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "VPlanAnalysis.h"
|
|
#include "VPlan.h"
|
|
#include "VPlanCFG.h"
|
|
#include "VPlanDominatorTree.h"
|
|
#include "llvm/ADT/PostOrderIterator.h"
|
|
#include "llvm/ADT/TypeSwitch.h"
|
|
#include "llvm/Analysis/ScalarEvolution.h"
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
#include "llvm/IR/Instruction.h"
|
|
#include "llvm/IR/PatternMatch.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "vplan"
|
|
|
|
VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan) : Ctx(Plan.getContext()) {
|
|
if (auto LoopRegion = Plan.getVectorLoopRegion()) {
|
|
if (const auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(
|
|
&LoopRegion->getEntryBasicBlock()->front())) {
|
|
CanonicalIVTy = CanIV->getScalarType();
|
|
return;
|
|
}
|
|
}
|
|
|
|
// If there's no canonical IV, retrieve the type from the trip count
|
|
// expression.
|
|
auto *TC = Plan.getTripCount();
|
|
if (TC->isLiveIn()) {
|
|
CanonicalIVTy = TC->getLiveInIRValue()->getType();
|
|
return;
|
|
}
|
|
CanonicalIVTy = cast<VPExpandSCEVRecipe>(TC)->getSCEV()->getType();
|
|
}
|
|
|
|
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) {
|
|
Type *ResTy = inferScalarType(R->getIncomingValue(0));
|
|
for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) {
|
|
VPValue *Inc = R->getIncomingValue(I);
|
|
assert(inferScalarType(Inc) == ResTy &&
|
|
"different types inferred for different incoming values");
|
|
CachedTypes[Inc] = ResTy;
|
|
}
|
|
return ResTy;
|
|
}
|
|
|
|
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
|
|
// Set the result type from the first operand, check if the types for all
|
|
// other operands match and cache them.
|
|
auto SetResultTyFromOp = [this, R]() {
|
|
Type *ResTy = inferScalarType(R->getOperand(0));
|
|
for (unsigned Op = 1; Op != R->getNumOperands(); ++Op) {
|
|
VPValue *OtherV = R->getOperand(Op);
|
|
assert(inferScalarType(OtherV) == ResTy &&
|
|
"different types inferred for different operands");
|
|
CachedTypes[OtherV] = ResTy;
|
|
}
|
|
return ResTy;
|
|
};
|
|
|
|
unsigned Opcode = R->getOpcode();
|
|
if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode))
|
|
return SetResultTyFromOp();
|
|
|
|
switch (Opcode) {
|
|
case Instruction::ExtractElement:
|
|
case Instruction::Freeze:
|
|
case VPInstruction::ReductionStartVector:
|
|
case VPInstruction::ResumeForEpilogue:
|
|
return inferScalarType(R->getOperand(0));
|
|
case Instruction::Select: {
|
|
Type *ResTy = inferScalarType(R->getOperand(1));
|
|
VPValue *OtherV = R->getOperand(2);
|
|
assert(inferScalarType(OtherV) == ResTy &&
|
|
"different types inferred for different operands");
|
|
CachedTypes[OtherV] = ResTy;
|
|
return ResTy;
|
|
}
|
|
case Instruction::ICmp:
|
|
case Instruction::FCmp:
|
|
case VPInstruction::ActiveLaneMask:
|
|
assert(inferScalarType(R->getOperand(0)) ==
|
|
inferScalarType(R->getOperand(1)) &&
|
|
"different types inferred for different operands");
|
|
return IntegerType::get(Ctx, 1);
|
|
case VPInstruction::ComputeAnyOfResult:
|
|
return inferScalarType(R->getOperand(1));
|
|
case VPInstruction::ComputeFindIVResult:
|
|
case VPInstruction::ComputeReductionResult: {
|
|
return inferScalarType(R->getOperand(0));
|
|
}
|
|
case VPInstruction::ExplicitVectorLength:
|
|
return Type::getIntNTy(Ctx, 32);
|
|
case Instruction::PHI:
|
|
// Infer the type of first operand only, as other operands of header phi's
|
|
// may lead to infinite recursion.
|
|
return inferScalarType(R->getOperand(0));
|
|
case VPInstruction::FirstOrderRecurrenceSplice:
|
|
case VPInstruction::Not:
|
|
case VPInstruction::CalculateTripCountMinusVF:
|
|
case VPInstruction::CanonicalIVIncrementForPart:
|
|
case VPInstruction::AnyOf:
|
|
case VPInstruction::BuildStructVector:
|
|
case VPInstruction::BuildVector:
|
|
return SetResultTyFromOp();
|
|
case VPInstruction::ExtractLane:
|
|
return inferScalarType(R->getOperand(1));
|
|
case VPInstruction::FirstActiveLane:
|
|
return Type::getIntNTy(Ctx, 64);
|
|
case VPInstruction::ExtractLastElement:
|
|
case VPInstruction::ExtractPenultimateElement: {
|
|
Type *BaseTy = inferScalarType(R->getOperand(0));
|
|
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
|
|
return VecTy->getElementType();
|
|
return BaseTy;
|
|
}
|
|
case VPInstruction::LogicalAnd:
|
|
assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) &&
|
|
inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
|
|
"LogicalAnd operands should be bool");
|
|
return IntegerType::get(Ctx, 1);
|
|
case VPInstruction::Broadcast:
|
|
case VPInstruction::PtrAdd:
|
|
case VPInstruction::WidePtrAdd:
|
|
// Return the type based on first operand.
|
|
return inferScalarType(R->getOperand(0));
|
|
case VPInstruction::BranchOnCond:
|
|
case VPInstruction::BranchOnCount:
|
|
return Type::getVoidTy(Ctx);
|
|
default:
|
|
break;
|
|
}
|
|
// Type inference not implemented for opcode.
|
|
LLVM_DEBUG({
|
|
dbgs() << "LV: Found unhandled opcode for: ";
|
|
R->getVPSingleValue()->dump();
|
|
});
|
|
llvm_unreachable("Unhandled opcode!");
|
|
}
|
|
|
|
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) {
|
|
unsigned Opcode = R->getOpcode();
|
|
if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) ||
|
|
Instruction::isBitwiseLogicOp(Opcode)) {
|
|
Type *ResTy = inferScalarType(R->getOperand(0));
|
|
assert(ResTy == inferScalarType(R->getOperand(1)) &&
|
|
"types for both operands must match for binary op");
|
|
CachedTypes[R->getOperand(1)] = ResTy;
|
|
return ResTy;
|
|
}
|
|
|
|
switch (Opcode) {
|
|
case Instruction::ICmp:
|
|
case Instruction::FCmp:
|
|
return IntegerType::get(Ctx, 1);
|
|
case Instruction::FNeg:
|
|
case Instruction::Freeze:
|
|
return inferScalarType(R->getOperand(0));
|
|
case Instruction::ExtractValue: {
|
|
assert(R->getNumOperands() == 2 && "expected single level extractvalue");
|
|
auto *StructTy = cast<StructType>(inferScalarType(R->getOperand(0)));
|
|
auto *CI = cast<ConstantInt>(R->getOperand(1)->getLiveInIRValue());
|
|
return StructTy->getTypeAtIndex(CI->getZExtValue());
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
// Type inference not implemented for opcode.
|
|
LLVM_DEBUG({
|
|
dbgs() << "LV: Found unhandled opcode for: ";
|
|
R->getVPSingleValue()->dump();
|
|
});
|
|
llvm_unreachable("Unhandled opcode!");
|
|
}
|
|
|
|
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
|
|
auto &CI = *cast<CallInst>(R->getUnderlyingInstr());
|
|
return CI.getType();
|
|
}
|
|
|
|
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
|
|
assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
|
|
"Store recipes should not define any values");
|
|
return cast<LoadInst>(&R->getIngredient())->getType();
|
|
}
|
|
|
|
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenSelectRecipe *R) {
|
|
Type *ResTy = inferScalarType(R->getOperand(1));
|
|
VPValue *OtherV = R->getOperand(2);
|
|
assert(inferScalarType(OtherV) == ResTy &&
|
|
"different types inferred for different operands");
|
|
CachedTypes[OtherV] = ResTy;
|
|
return ResTy;
|
|
}
|
|
|
|
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) {
|
|
unsigned Opcode = R->getUnderlyingInstr()->getOpcode();
|
|
|
|
if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) ||
|
|
Instruction::isBitwiseLogicOp(Opcode)) {
|
|
Type *ResTy = inferScalarType(R->getOperand(0));
|
|
assert(ResTy == inferScalarType(R->getOperand(1)) &&
|
|
"inferred types for operands of binary op don't match");
|
|
CachedTypes[R->getOperand(1)] = ResTy;
|
|
return ResTy;
|
|
}
|
|
|
|
if (Instruction::isCast(Opcode))
|
|
return R->getUnderlyingInstr()->getType();
|
|
|
|
switch (Opcode) {
|
|
case Instruction::Call: {
|
|
unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1);
|
|
return cast<Function>(R->getOperand(CallIdx)->getLiveInIRValue())
|
|
->getReturnType();
|
|
}
|
|
case Instruction::Select: {
|
|
Type *ResTy = inferScalarType(R->getOperand(1));
|
|
assert(ResTy == inferScalarType(R->getOperand(2)) &&
|
|
"inferred types for operands of select op don't match");
|
|
CachedTypes[R->getOperand(2)] = ResTy;
|
|
return ResTy;
|
|
}
|
|
case Instruction::ICmp:
|
|
case Instruction::FCmp:
|
|
return IntegerType::get(Ctx, 1);
|
|
case Instruction::Alloca:
|
|
case Instruction::ExtractValue:
|
|
return R->getUnderlyingInstr()->getType();
|
|
case Instruction::Freeze:
|
|
case Instruction::FNeg:
|
|
case Instruction::GetElementPtr:
|
|
return inferScalarType(R->getOperand(0));
|
|
case Instruction::Load:
|
|
return cast<LoadInst>(R->getUnderlyingInstr())->getType();
|
|
case Instruction::Store:
|
|
// FIXME: VPReplicateRecipes with store opcodes still define a result
|
|
// VPValue, so we need to handle them here. Remove the code here once this
|
|
// is modeled accurately in VPlan.
|
|
return Type::getVoidTy(Ctx);
|
|
default:
|
|
break;
|
|
}
|
|
// Type inference not implemented for opcode.
|
|
LLVM_DEBUG({
|
|
dbgs() << "LV: Found unhandled opcode for: ";
|
|
R->getVPSingleValue()->dump();
|
|
});
|
|
llvm_unreachable("Unhandled opcode");
|
|
}
|
|
|
|
Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
|
|
if (Type *CachedTy = CachedTypes.lookup(V))
|
|
return CachedTy;
|
|
|
|
if (V->isLiveIn()) {
|
|
if (auto *IRValue = V->getLiveInIRValue())
|
|
return IRValue->getType();
|
|
// All VPValues without any underlying IR value (like the vector trip count
|
|
// or the backedge-taken count) have the same type as the canonical IV.
|
|
return CanonicalIVTy;
|
|
}
|
|
|
|
Type *ResultTy =
|
|
TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
|
|
.Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
|
|
VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
|
|
VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>(
|
|
[this](const auto *R) {
|
|
// Handle header phi recipes, except VPWidenIntOrFpInduction
|
|
// which needs special handling due it being possibly truncated.
|
|
// TODO: consider inferring/caching type of siblings, e.g.,
|
|
// backedge value, here and in cases below.
|
|
return inferScalarType(R->getStartValue());
|
|
})
|
|
.Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
|
|
[](const auto *R) { return R->getScalarType(); })
|
|
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
|
|
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
|
|
VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe,
|
|
VPPartialReductionRecipe>([this](const VPRecipeBase *R) {
|
|
return inferScalarType(R->getOperand(0));
|
|
})
|
|
// VPInstructionWithType must be handled before VPInstruction.
|
|
.Case<VPInstructionWithType, VPWidenIntrinsicRecipe,
|
|
VPWidenCastRecipe>(
|
|
[](const auto *R) { return R->getResultType(); })
|
|
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
|
|
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
|
|
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
|
|
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
|
|
// TODO: Use info from interleave group.
|
|
return V->getUnderlyingValue()->getType();
|
|
})
|
|
.Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) {
|
|
return R->getSCEV()->getType();
|
|
})
|
|
.Case<VPReductionRecipe>([this](const auto *R) {
|
|
return inferScalarType(R->getChainOp());
|
|
})
|
|
.Case<VPExpressionRecipe>([this](const auto *R) {
|
|
return inferScalarType(R->getOperandOfResultType());
|
|
});
|
|
|
|
assert(ResultTy && "could not infer type for the given VPValue");
|
|
CachedTypes[V] = ResultTy;
|
|
return ResultTy;
|
|
}
|
|
|
|
void llvm::collectEphemeralRecipesForVPlan(
|
|
VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) {
|
|
// First, collect seed recipes which are operands of assumes.
|
|
SmallVector<VPRecipeBase *> Worklist;
|
|
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
|
|
vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
|
|
for (VPRecipeBase &R : *VPBB) {
|
|
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
|
|
if (!RepR || !match(RepR->getUnderlyingInstr(),
|
|
PatternMatch::m_Intrinsic<Intrinsic::assume>()))
|
|
continue;
|
|
Worklist.push_back(RepR);
|
|
EphRecipes.insert(RepR);
|
|
}
|
|
}
|
|
|
|
// Process operands of candidates in worklist and add them to the set of
|
|
// ephemeral recipes, if they don't have side-effects and are only used by
|
|
// other ephemeral recipes.
|
|
while (!Worklist.empty()) {
|
|
VPRecipeBase *Cur = Worklist.pop_back_val();
|
|
for (VPValue *Op : Cur->operands()) {
|
|
auto *OpR = Op->getDefiningRecipe();
|
|
if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(OpR))
|
|
continue;
|
|
if (any_of(Op->users(), [EphRecipes](VPUser *U) {
|
|
auto *UR = dyn_cast<VPRecipeBase>(U);
|
|
return !UR || !EphRecipes.contains(UR);
|
|
}))
|
|
continue;
|
|
EphRecipes.insert(OpR);
|
|
Worklist.push_back(OpR);
|
|
}
|
|
}
|
|
}
|
|
|
|
template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>(
|
|
DominatorTreeBase<VPBlockBase, false> &DT);
|
|
|
|
bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
|
|
const VPRecipeBase *B) {
|
|
if (A == B)
|
|
return false;
|
|
|
|
auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) {
|
|
for (auto &R : *A->getParent()) {
|
|
if (&R == A)
|
|
return true;
|
|
if (&R == B)
|
|
return false;
|
|
}
|
|
llvm_unreachable("recipe not found");
|
|
};
|
|
const VPBlockBase *ParentA = A->getParent();
|
|
const VPBlockBase *ParentB = B->getParent();
|
|
if (ParentA == ParentB)
|
|
return LocalComesBefore(A, B);
|
|
|
|
#ifndef NDEBUG
|
|
auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * {
|
|
auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
|
|
if (Region && Region->isReplicator()) {
|
|
assert(Region->getNumSuccessors() == 1 &&
|
|
Region->getNumPredecessors() == 1 && "Expected SESE region!");
|
|
assert(R->getParent()->size() == 1 &&
|
|
"A recipe in an original replicator region must be the only "
|
|
"recipe in its block");
|
|
return Region;
|
|
}
|
|
return nullptr;
|
|
};
|
|
assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&
|
|
"No replicate regions expected at this point");
|
|
assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&
|
|
"No replicate regions expected at this point");
|
|
#endif
|
|
return Base::properlyDominates(ParentA, ParentB);
|
|
}
|
|
|
|
/// Get the VF scaling factor applied to the recipe's output, if the recipe has
|
|
/// one.
|
|
static unsigned getVFScaleFactor(VPRecipeBase *R) {
|
|
if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
|
|
return RR->getVFScaleFactor();
|
|
if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
|
|
return RR->getVFScaleFactor();
|
|
assert(
|
|
(!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
|
|
VPInstruction::ReductionStartVector) &&
|
|
"getting scaling factor of reduction-start-vector not implemented yet");
|
|
return 1;
|
|
}
|
|
|
|
bool VPRegisterUsage::exceedsMaxNumRegs(const TargetTransformInfo &TTI,
|
|
unsigned OverrideMaxNumRegs) const {
|
|
return any_of(MaxLocalUsers, [&TTI, &OverrideMaxNumRegs](auto &LU) {
|
|
return LU.second > (OverrideMaxNumRegs > 0
|
|
? OverrideMaxNumRegs
|
|
: TTI.getNumberOfRegisters(LU.first));
|
|
});
|
|
}
|
|
|
|
SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
|
|
VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI,
|
|
const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
|
|
// Each 'key' in the map opens a new interval. The values
|
|
// of the map are the index of the 'last seen' usage of the
|
|
// recipe that is the key.
|
|
using IntervalMap = SmallDenseMap<VPRecipeBase *, unsigned, 16>;
|
|
|
|
// Maps indices to recipes.
|
|
SmallVector<VPRecipeBase *, 64> Idx2Recipe;
|
|
// Marks the end of each interval.
|
|
IntervalMap EndPoint;
|
|
// Saves the list of recipe indices that are used in the loop.
|
|
SmallPtrSet<VPRecipeBase *, 8> Ends;
|
|
// Saves the list of values that are used in the loop but are defined outside
|
|
// the loop (not including non-recipe values such as arguments and
|
|
// constants).
|
|
SmallSetVector<VPValue *, 8> LoopInvariants;
|
|
LoopInvariants.insert(&Plan.getVectorTripCount());
|
|
|
|
// We scan the loop in a topological order in order and assign a number to
|
|
// each recipe. We use RPO to ensure that defs are met before their users. We
|
|
// assume that each recipe that has in-loop users starts an interval. We
|
|
// record every time that an in-loop value is used, so we have a list of the
|
|
// first and last occurrences of each recipe.
|
|
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
|
|
Plan.getVectorLoopRegion());
|
|
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
|
|
if (!VPBB->getParent())
|
|
break;
|
|
for (VPRecipeBase &R : *VPBB) {
|
|
Idx2Recipe.push_back(&R);
|
|
|
|
// Save the end location of each USE.
|
|
for (VPValue *U : R.operands()) {
|
|
auto *DefR = U->getDefiningRecipe();
|
|
|
|
// Ignore non-recipe values such as arguments, constants, etc.
|
|
// FIXME: Might need some motivation why these values are ignored. If
|
|
// for example an argument is used inside the loop it will increase the
|
|
// register pressure (so shouldn't we add it to LoopInvariants).
|
|
if (!DefR && (!U->getLiveInIRValue() ||
|
|
!isa<Instruction>(U->getLiveInIRValue())))
|
|
continue;
|
|
|
|
// If this recipe is outside the loop then record it and continue.
|
|
if (!DefR) {
|
|
LoopInvariants.insert(U);
|
|
continue;
|
|
}
|
|
|
|
// Overwrite previous end points.
|
|
EndPoint[DefR] = Idx2Recipe.size();
|
|
Ends.insert(DefR);
|
|
}
|
|
}
|
|
if (VPBB == Plan.getVectorLoopRegion()->getExiting()) {
|
|
// VPWidenIntOrFpInductionRecipes are used implicitly at the end of the
|
|
// exiting block, where their increment will get materialized eventually.
|
|
for (auto &R : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
|
|
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
|
|
EndPoint[&R] = Idx2Recipe.size();
|
|
Ends.insert(&R);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Saves the list of intervals that end with the index in 'key'.
|
|
using RecipeList = SmallVector<VPRecipeBase *, 2>;
|
|
SmallDenseMap<unsigned, RecipeList, 16> TransposeEnds;
|
|
|
|
// Next, we transpose the EndPoints into a multi map that holds the list of
|
|
// intervals that *end* at a specific location.
|
|
for (auto &Interval : EndPoint)
|
|
TransposeEnds[Interval.second].push_back(Interval.first);
|
|
|
|
SmallPtrSet<VPRecipeBase *, 8> OpenIntervals;
|
|
SmallVector<VPRegisterUsage, 8> RUs(VFs.size());
|
|
SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
|
|
|
|
LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
|
|
|
|
VPTypeAnalysis TypeInfo(Plan);
|
|
|
|
const auto &TTICapture = TTI;
|
|
auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
|
|
if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty) ||
|
|
(VF.isScalable() &&
|
|
!TTICapture.isElementTypeLegalForScalableVector(Ty)))
|
|
return 0;
|
|
return TTICapture.getRegUsageForType(VectorType::get(Ty, VF));
|
|
};
|
|
|
|
// We scan the instructions linearly and record each time that a new interval
|
|
// starts, by placing it in a set. If we find this value in TransposEnds then
|
|
// we remove it from the set. The max register usage is the maximum register
|
|
// usage of the recipes of the set.
|
|
for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
|
|
VPRecipeBase *R = Idx2Recipe[Idx];
|
|
|
|
// Remove all of the recipes that end at this location.
|
|
RecipeList &List = TransposeEnds[Idx];
|
|
for (VPRecipeBase *ToRemove : List)
|
|
OpenIntervals.erase(ToRemove);
|
|
|
|
// Ignore recipes that are never used within the loop and do not have side
|
|
// effects.
|
|
if (!Ends.count(R) && !R->mayHaveSideEffects())
|
|
continue;
|
|
|
|
// Skip recipes for ignored values.
|
|
// TODO: Should mark recipes for ephemeral values that cannot be removed
|
|
// explictly in VPlan.
|
|
if (isa<VPSingleDefRecipe>(R) &&
|
|
ValuesToIgnore.contains(
|
|
cast<VPSingleDefRecipe>(R)->getUnderlyingValue()))
|
|
continue;
|
|
|
|
// For each VF find the maximum usage of registers.
|
|
for (unsigned J = 0, E = VFs.size(); J < E; ++J) {
|
|
// Count the number of registers used, per register class, given all open
|
|
// intervals.
|
|
// Note that elements in this SmallMapVector will be default constructed
|
|
// as 0. So we can use "RegUsage[ClassID] += n" in the code below even if
|
|
// there is no previous entry for ClassID.
|
|
SmallMapVector<unsigned, unsigned, 4> RegUsage;
|
|
|
|
for (auto *R : OpenIntervals) {
|
|
// Skip recipes that weren't present in the original loop.
|
|
// TODO: Remove after removing the legacy
|
|
// LoopVectorizationCostModel::calculateRegisterUsage
|
|
if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
|
|
VPBranchOnMaskRecipe>(R))
|
|
continue;
|
|
|
|
if (VFs[J].isScalar() ||
|
|
isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
|
|
VPScalarIVStepsRecipe>(R) ||
|
|
(isa<VPInstruction>(R) &&
|
|
all_of(cast<VPSingleDefRecipe>(R)->users(),
|
|
[&](VPUser *U) {
|
|
return cast<VPRecipeBase>(U)->usesScalars(
|
|
R->getVPSingleValue());
|
|
})) ||
|
|
(isa<VPReductionPHIRecipe>(R) &&
|
|
(cast<VPReductionPHIRecipe>(R))->isInLoop())) {
|
|
unsigned ClassID = TTI.getRegisterClassForType(
|
|
false, TypeInfo.inferScalarType(R->getVPSingleValue()));
|
|
// FIXME: The target might use more than one register for the type
|
|
// even in the scalar case.
|
|
RegUsage[ClassID] += 1;
|
|
} else {
|
|
// The output from scaled phis and scaled reductions actually has
|
|
// fewer lanes than the VF.
|
|
unsigned ScaleFactor = getVFScaleFactor(R);
|
|
ElementCount VF = VFs[J].divideCoefficientBy(ScaleFactor);
|
|
LLVM_DEBUG(if (VF != VFs[J]) {
|
|
dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << " to " << VF
|
|
<< " for " << *R << "\n";
|
|
});
|
|
|
|
for (VPValue *DefV : R->definedValues()) {
|
|
Type *ScalarTy = TypeInfo.inferScalarType(DefV);
|
|
unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);
|
|
RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (const auto &Pair : RegUsage) {
|
|
auto &Entry = MaxUsages[J][Pair.first];
|
|
Entry = std::max(Entry, Pair.second);
|
|
}
|
|
}
|
|
|
|
LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "
|
|
<< OpenIntervals.size() << '\n');
|
|
|
|
// Add the current recipe to the list of open intervals.
|
|
OpenIntervals.insert(R);
|
|
}
|
|
|
|
// We also search for instructions that are defined outside the loop, but are
|
|
// used inside the loop. We need this number separately from the max-interval
|
|
// usage number because when we unroll, loop-invariant values do not take
|
|
// more register.
|
|
VPRegisterUsage RU;
|
|
for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) {
|
|
// Note that elements in this SmallMapVector will be default constructed
|
|
// as 0. So we can use "Invariant[ClassID] += n" in the code below even if
|
|
// there is no previous entry for ClassID.
|
|
SmallMapVector<unsigned, unsigned, 4> Invariant;
|
|
|
|
for (auto *In : LoopInvariants) {
|
|
// FIXME: The target might use more than one register for the type
|
|
// even in the scalar case.
|
|
bool IsScalar = all_of(In->users(), [&](VPUser *U) {
|
|
return cast<VPRecipeBase>(U)->usesScalars(In);
|
|
});
|
|
|
|
ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[Idx];
|
|
unsigned ClassID = TTI.getRegisterClassForType(
|
|
VF.isVector(), TypeInfo.inferScalarType(In));
|
|
Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(In), VF);
|
|
}
|
|
|
|
LLVM_DEBUG({
|
|
dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n';
|
|
dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size()
|
|
<< " item\n";
|
|
for (const auto &pair : MaxUsages[Idx]) {
|
|
dbgs() << "LV(REG): RegisterClass: "
|
|
<< TTI.getRegisterClassName(pair.first) << ", " << pair.second
|
|
<< " registers\n";
|
|
}
|
|
dbgs() << "LV(REG): Found invariant usage: " << Invariant.size()
|
|
<< " item\n";
|
|
for (const auto &pair : Invariant) {
|
|
dbgs() << "LV(REG): RegisterClass: "
|
|
<< TTI.getRegisterClassName(pair.first) << ", " << pair.second
|
|
<< " registers\n";
|
|
}
|
|
});
|
|
|
|
RU.LoopInvariantRegs = Invariant;
|
|
RU.MaxLocalUsers = MaxUsages[Idx];
|
|
RUs[Idx] = RU;
|
|
}
|
|
|
|
return RUs;
|
|
}
|