[VPlan] Factor out logic to common compute costs to helper (NFCI). (#153361)
A number of recipes compute costs for the same opcodes for scalars or vectors, depending on the recipe. Move the common logic out to a helper in VPRecipeWithIRFlags, that is then used by VPReplicateRecipe, VPWidenRecipe and VPInstruction. This makes it easier to cover all relevant opcodes, without duplication. PR: https://github.com/llvm/llvm-project/pull/153361
This commit is contained in:
parent
f1458ec623
commit
35be64a416
@ -898,6 +898,11 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
|
||||
}
|
||||
|
||||
void execute(VPTransformState &State) override = 0;
|
||||
|
||||
/// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
|
||||
std::optional<InstructionCost>
|
||||
getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF,
|
||||
VPCostContext &Ctx) const;
|
||||
};
|
||||
|
||||
/// Helper to access the operand that contains the unroll part for this recipe
|
||||
|
@ -942,28 +942,90 @@ Value *VPInstruction::generate(VPTransformState &State) {
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<InstructionCost> VPRecipeWithIRFlags::getCostForRecipeWithOpcode(
|
||||
unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const {
|
||||
Type *ScalarTy = Ctx.Types.inferScalarType(this);
|
||||
Type *ResultTy = VF.isVector() ? toVectorTy(ScalarTy, VF) : ScalarTy;
|
||||
switch (Opcode) {
|
||||
case Instruction::FNeg:
|
||||
return Ctx.TTI.getArithmeticInstrCost(Opcode, ResultTy, Ctx.CostKind);
|
||||
case Instruction::UDiv:
|
||||
case Instruction::SDiv:
|
||||
case Instruction::SRem:
|
||||
case Instruction::URem:
|
||||
case Instruction::Add:
|
||||
case Instruction::FAdd:
|
||||
case Instruction::Sub:
|
||||
case Instruction::FSub:
|
||||
case Instruction::Mul:
|
||||
case Instruction::FMul:
|
||||
case Instruction::FDiv:
|
||||
case Instruction::FRem:
|
||||
case Instruction::Shl:
|
||||
case Instruction::LShr:
|
||||
case Instruction::AShr:
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor: {
|
||||
TargetTransformInfo::OperandValueInfo RHSInfo = {
|
||||
TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None};
|
||||
|
||||
if (VF.isVector()) {
|
||||
// Certain instructions can be cheaper to vectorize if they have a
|
||||
// constant second vector operand. One example of this are shifts on x86.
|
||||
VPValue *RHS = getOperand(1);
|
||||
RHSInfo = Ctx.getOperandInfo(RHS);
|
||||
|
||||
if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
|
||||
getOperand(1)->isDefinedOutsideLoopRegions())
|
||||
RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;
|
||||
}
|
||||
|
||||
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
|
||||
SmallVector<const Value *, 4> Operands;
|
||||
if (CtxI)
|
||||
Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
|
||||
return Ctx.TTI.getArithmeticInstrCost(
|
||||
Opcode, ResultTy, Ctx.CostKind,
|
||||
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
|
||||
RHSInfo, Operands, CtxI, &Ctx.TLI);
|
||||
}
|
||||
case Instruction::Freeze:
|
||||
// This opcode is unknown. Assume that it is the same as 'mul'.
|
||||
return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, ResultTy,
|
||||
Ctx.CostKind);
|
||||
case Instruction::ExtractValue:
|
||||
return Ctx.TTI.getInsertExtractValueCost(Instruction::ExtractValue,
|
||||
Ctx.CostKind);
|
||||
case Instruction::ICmp:
|
||||
case Instruction::FCmp: {
|
||||
Type *ScalarOpTy = Ctx.Types.inferScalarType(getOperand(0));
|
||||
Type *OpTy = VF.isVector() ? toVectorTy(ScalarOpTy, VF) : ScalarOpTy;
|
||||
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
|
||||
return Ctx.TTI.getCmpSelInstrCost(
|
||||
Opcode, OpTy, CmpInst::makeCmpResultType(OpTy), getPredicate(),
|
||||
Ctx.CostKind, {TTI::OK_AnyValue, TTI::OP_None},
|
||||
{TTI::OK_AnyValue, TTI::OP_None}, CtxI);
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
InstructionCost VPInstruction::computeCost(ElementCount VF,
|
||||
VPCostContext &Ctx) const {
|
||||
if (Instruction::isBinaryOp(getOpcode())) {
|
||||
Type *ResTy = Ctx.Types.inferScalarType(this);
|
||||
if (!vputils::onlyFirstLaneUsed(this))
|
||||
ResTy = toVectorTy(ResTy, VF);
|
||||
|
||||
if (!getUnderlyingValue()) {
|
||||
switch (getOpcode()) {
|
||||
case Instruction::FMul:
|
||||
return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind);
|
||||
default:
|
||||
// TODO: Compute cost for VPInstructions without underlying values once
|
||||
// the legacy cost model has been retired.
|
||||
return 0;
|
||||
}
|
||||
if (!getUnderlyingValue() && getOpcode() != Instruction::FMul) {
|
||||
// TODO: Compute cost for VPInstructions without underlying values once
|
||||
// the legacy cost model has been retired.
|
||||
return 0;
|
||||
}
|
||||
|
||||
assert(!doesGeneratePerAllLanes() &&
|
||||
"Should only generate a vector value or single scalar, not scalars "
|
||||
"for all lanes.");
|
||||
return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind);
|
||||
return *getCostForRecipeWithOpcode(
|
||||
getOpcode(),
|
||||
vputils::onlyFirstLaneUsed(this) ? ElementCount::getFixed(1) : VF, Ctx);
|
||||
}
|
||||
|
||||
switch (getOpcode()) {
|
||||
@ -2033,20 +2095,13 @@ void VPWidenRecipe::execute(VPTransformState &State) {
|
||||
InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
|
||||
VPCostContext &Ctx) const {
|
||||
switch (Opcode) {
|
||||
case Instruction::FNeg: {
|
||||
Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
|
||||
return Ctx.TTI.getArithmeticInstrCost(
|
||||
Opcode, VectorTy, Ctx.CostKind,
|
||||
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
|
||||
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});
|
||||
}
|
||||
|
||||
case Instruction::UDiv:
|
||||
case Instruction::SDiv:
|
||||
case Instruction::SRem:
|
||||
case Instruction::URem:
|
||||
// More complex computation, let the legacy cost-model handle this for now.
|
||||
return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
|
||||
case Instruction::FNeg:
|
||||
case Instruction::Add:
|
||||
case Instruction::FAdd:
|
||||
case Instruction::Sub:
|
||||
@ -2060,45 +2115,12 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
|
||||
case Instruction::AShr:
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor: {
|
||||
VPValue *RHS = getOperand(1);
|
||||
// Certain instructions can be cheaper to vectorize if they have a constant
|
||||
// second vector operand. One example of this are shifts on x86.
|
||||
TargetTransformInfo::OperandValueInfo RHSInfo = Ctx.getOperandInfo(RHS);
|
||||
|
||||
if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
|
||||
getOperand(1)->isDefinedOutsideLoopRegions())
|
||||
RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;
|
||||
Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
|
||||
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
|
||||
|
||||
SmallVector<const Value *, 4> Operands;
|
||||
if (CtxI)
|
||||
Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
|
||||
return Ctx.TTI.getArithmeticInstrCost(
|
||||
Opcode, VectorTy, Ctx.CostKind,
|
||||
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
|
||||
RHSInfo, Operands, CtxI, &Ctx.TLI);
|
||||
}
|
||||
case Instruction::Freeze: {
|
||||
// This opcode is unknown. Assume that it is the same as 'mul'.
|
||||
Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
|
||||
return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy,
|
||||
Ctx.CostKind);
|
||||
}
|
||||
case Instruction::ExtractValue: {
|
||||
return Ctx.TTI.getInsertExtractValueCost(Instruction::ExtractValue,
|
||||
Ctx.CostKind);
|
||||
}
|
||||
case Instruction::Xor:
|
||||
case Instruction::Freeze:
|
||||
case Instruction::ExtractValue:
|
||||
case Instruction::ICmp:
|
||||
case Instruction::FCmp: {
|
||||
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
|
||||
Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
|
||||
return Ctx.TTI.getCmpSelInstrCost(
|
||||
Opcode, VectorTy, CmpInst::makeCmpResultType(VectorTy), getPredicate(),
|
||||
Ctx.CostKind, {TTI::OK_AnyValue, TTI::OP_None},
|
||||
{TTI::OK_AnyValue, TTI::OP_None}, CtxI);
|
||||
}
|
||||
case Instruction::FCmp:
|
||||
return *getCostForRecipeWithOpcode(getOpcode(), VF, Ctx);
|
||||
default:
|
||||
llvm_unreachable("Unsupported opcode for instruction");
|
||||
}
|
||||
@ -2972,7 +2994,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
|
||||
// transform, avoid computing their cost multiple times for now.
|
||||
Ctx.SkipCostComputation.insert(UI);
|
||||
|
||||
Type *ResultTy = Ctx.Types.inferScalarType(this);
|
||||
switch (UI->getOpcode()) {
|
||||
case Instruction::GetElementPtr:
|
||||
// We mark this instruction as zero-cost because the cost of GEPs in
|
||||
@ -2996,6 +3017,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
|
||||
SmallVector<Type *, 4> Tys;
|
||||
for (VPValue *ArgOp : drop_end(operands()))
|
||||
Tys.push_back(Ctx.Types.inferScalarType(ArgOp));
|
||||
Type *ResultTy = Ctx.Types.inferScalarType(this);
|
||||
return Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
|
||||
}
|
||||
case Instruction::Add:
|
||||
@ -3012,12 +3034,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor: {
|
||||
auto Op2Info = Ctx.getOperandInfo(getOperand(1));
|
||||
SmallVector<const Value *, 4> Operands(UI->operand_values());
|
||||
return Ctx.TTI.getArithmeticInstrCost(
|
||||
UI->getOpcode(), ResultTy, Ctx.CostKind,
|
||||
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
|
||||
Op2Info, Operands, UI, &Ctx.TLI) *
|
||||
return *getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1),
|
||||
Ctx) *
|
||||
(isSingleScalar() ? 1 : VF.getFixedValue());
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user