[LV] Fix bug in setVectorizedCallDecision (#175742)
There is a bug in this logic:
```
InstructionCost Cost = ScalarCost;
InstWidening Decision = CM_Scalarize;
if (VectorCost <= Cost) {
Cost = VectorCost;
Decision = CM_VectorCall;
}
if (IntrinsicCost <= Cost) {
Cost = IntrinsicCost;
Decision = CM_IntrinsicCall;
}
```
because it assumes that the comparisons behave sensibly in the face of
invalid costs. Unfortunately, PR #174835 exposes an issue when
attempting to vectorise the new test
uadd_with_overflow_i32 for AArch64 targets. Specifically, there are
situations where all costs are invalid (e.g. VF=vscale x 1), but some
costs are more invalid than others. For example, when querying the
intrinsic cost via the TTI hook we get an invalid cost with a non-zero
value, whereas the vector cost is invalid with a zero value. That leads
to us erroneously choosing CM_VectorCall as the call widening decision,
despite the lack of a vector math variant. Inevitably this causes
crashes because we create a VPCallWidenRecipe without a variant
function.
Fix this by only performing comparisons if the costs are valid. It now
leads to us choosing CM_Scalarize more often, but it's a toin coss
anyway between CM_Scalarize and CM_IntrinsicCall when both strategies
are invalid. Potentially we could also create a new strategy called
CM_Invalid, and avoid the creation of VPlans entirely.
This commit is contained in:
parent
f51eca20cf
commit
48ce7bb038
@ -6021,12 +6021,12 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
|
||||
InstructionCost Cost = ScalarCost;
|
||||
InstWidening Decision = CM_Scalarize;
|
||||
|
||||
if (VectorCost <= Cost) {
|
||||
if (VectorCost.isValid() && VectorCost <= Cost) {
|
||||
Cost = VectorCost;
|
||||
Decision = CM_VectorCall;
|
||||
}
|
||||
|
||||
if (IntrinsicCost <= Cost) {
|
||||
if (IntrinsicCost.isValid() && IntrinsicCost <= Cost) {
|
||||
Cost = IntrinsicCost;
|
||||
Decision = CM_IntrinsicCall;
|
||||
}
|
||||
|
||||
@ -9,16 +9,16 @@
|
||||
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val)
|
||||
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 4: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
|
||||
|
||||
; CHECK-COST-ARMPL-LABEL: sincos_f32
|
||||
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val)
|
||||
; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
|
||||
define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
|
||||
@ -85,13 +85,13 @@ exit:
|
||||
; CHECK-COST-LABEL: sincos_f64
|
||||
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincos.f64(double %in_val)
|
||||
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincos.f64(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincos.f64(ir<%in_val>)
|
||||
|
||||
; CHECK-COST-ARMPL-LABEL: sincos_f64
|
||||
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincos.f64(double %in_val)
|
||||
; CHECK-COST-ARMPL: Cost of 12 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincos.f64(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
|
||||
define void @sincos_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
|
||||
@ -159,16 +159,16 @@ exit:
|
||||
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val)
|
||||
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 4: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
|
||||
|
||||
; CHECK-COST-ARMPL-LABEL: predicated_sincos
|
||||
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val)
|
||||
; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
|
||||
|
||||
define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
|
||||
@ -231,16 +231,16 @@ for.end:
|
||||
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.modf.f32(float %in_val)
|
||||
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.modf.f32(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.modf.f32(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 4: REPLICATE ir<%call> = call @llvm.modf.f32(ir<%in_val>)
|
||||
|
||||
; CHECK-COST-ARMPL-LABEL: modf_f32
|
||||
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.modf.f32(float %in_val)
|
||||
; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of 11 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.modf.f32(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.modf.f32(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of 12 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
|
||||
define void @modf_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
|
||||
@ -307,13 +307,13 @@ exit:
|
||||
; CHECK-COST-LABEL: modf_f64
|
||||
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.modf.f64(double %in_val)
|
||||
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.modf.f64(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.modf.f64(ir<%in_val>)
|
||||
|
||||
; CHECK-COST-ARMPL-LABEL: modf_f64
|
||||
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.modf.f64(double %in_val)
|
||||
; CHECK-COST-ARMPL: Cost of 11 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.modf.f64(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of 12 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
|
||||
|
||||
define void @modf_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
|
||||
@ -381,16 +381,16 @@ exit:
|
||||
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincospi.f32(float %in_val)
|
||||
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincospi.f32(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincospi.f32(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 4: REPLICATE ir<%call> = call @llvm.sincospi.f32(ir<%in_val>)
|
||||
|
||||
; CHECK-COST-ARMPL-LABEL: sincospi_f32
|
||||
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincospi.f32(float %in_val)
|
||||
; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincospi.f32(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincospi.f32(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
|
||||
define void @sincospi_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
|
||||
@ -457,13 +457,13 @@ exit:
|
||||
; CHECK-COST-LABEL: sincospi_f64
|
||||
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincospi.f64(double %in_val)
|
||||
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincospi.f64(ir<%in_val>)
|
||||
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincospi.f64(ir<%in_val>)
|
||||
|
||||
; CHECK-COST-ARMPL-LABEL: sincospi_f64
|
||||
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincospi.f64(double %in_val)
|
||||
; CHECK-COST-ARMPL: Cost of 12 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincospi.f64(ir<%in_val>)
|
||||
; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
|
||||
|
||||
define void @sincospi_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user