[LV] Fix bug in setVectorizedCallDecision (#175742)

There is a bug in this logic:

```
   InstructionCost Cost = ScalarCost;
   InstWidening Decision = CM_Scalarize;

   if (VectorCost <= Cost) {
     Cost = VectorCost;
     Decision = CM_VectorCall;
   }

   if (IntrinsicCost <= Cost) {
     Cost = IntrinsicCost;
     Decision = CM_IntrinsicCall;
   }
```

because it assumes that the comparisons behave sensibly in the face of
invalid costs. Unfortunately, PR #174835 exposes an issue when
attempting to vectorise the new test
uadd_with_overflow_i32 for AArch64 targets. Specifically, there are
situations where all costs are invalid (e.g. VF=vscale x 1), but some
costs are more invalid than others. For example, when querying the
intrinsic cost via the TTI hook we get an invalid cost with a non-zero
value, whereas the vector cost is invalid with a zero value. That leads
to us erroneously choosing CM_VectorCall as the call widening decision,
despite the lack of a vector math variant. Inevitably this causes
crashes because we create a VPCallWidenRecipe without a variant
function.

Fix this by only performing comparisons if the costs are valid. It now
leads to us choosing CM_Scalarize more often, but it's a toin coss
anyway between CM_Scalarize and CM_IntrinsicCall when both strategies
are invalid. Potentially we could also create a new strategy called
CM_Invalid, and avoid the creation of VPlans entirely.
This commit is contained in:
David Sherwood 2026-01-14 07:28:38 +00:00 committed by GitHub
parent f51eca20cf
commit 48ce7bb038
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 31 additions and 31 deletions

View File

@ -6021,12 +6021,12 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
InstructionCost Cost = ScalarCost;
InstWidening Decision = CM_Scalarize;
if (VectorCost <= Cost) {
if (VectorCost.isValid() && VectorCost <= Cost) {
Cost = VectorCost;
Decision = CM_VectorCall;
}
if (IntrinsicCost <= Cost) {
if (IntrinsicCost.isValid() && IntrinsicCost <= Cost) {
Cost = IntrinsicCost;
Decision = CM_IntrinsicCall;
}

View File

@ -9,16 +9,16 @@
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val)
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 4: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
; CHECK-COST-ARMPL-LABEL: sincos_f32
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val)
; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
@ -85,13 +85,13 @@ exit:
; CHECK-COST-LABEL: sincos_f64
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincos.f64(double %in_val)
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincos.f64(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincos.f64(ir<%in_val>)
; CHECK-COST-ARMPL-LABEL: sincos_f64
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincos.f64(double %in_val)
; CHECK-COST-ARMPL: Cost of 12 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincos.f64(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
define void @sincos_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
@ -159,16 +159,16 @@ exit:
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val)
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 4: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
; CHECK-COST-ARMPL-LABEL: predicated_sincos
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val)
; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincos.f32(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
@ -231,16 +231,16 @@ for.end:
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.modf.f32(float %in_val)
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.modf.f32(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.modf.f32(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 4: REPLICATE ir<%call> = call @llvm.modf.f32(ir<%in_val>)
; CHECK-COST-ARMPL-LABEL: modf_f32
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.modf.f32(float %in_val)
; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 11 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.modf.f32(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.modf.f32(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 12 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
define void @modf_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
@ -307,13 +307,13 @@ exit:
; CHECK-COST-LABEL: modf_f64
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.modf.f64(double %in_val)
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.modf.f64(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.modf.f64(ir<%in_val>)
; CHECK-COST-ARMPL-LABEL: modf_f64
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.modf.f64(double %in_val)
; CHECK-COST-ARMPL: Cost of 11 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.modf.f64(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 12 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>)
define void @modf_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
@ -381,16 +381,16 @@ exit:
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincospi.f32(float %in_val)
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincospi.f32(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincospi.f32(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 4: REPLICATE ir<%call> = call @llvm.sincospi.f32(ir<%in_val>)
; CHECK-COST-ARMPL-LABEL: sincospi_f32
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincospi.f32(float %in_val)
; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincospi.f32(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincospi.f32(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
define void @sincospi_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
@ -457,13 +457,13 @@ exit:
; CHECK-COST-LABEL: sincospi_f64
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincospi.f64(double %in_val)
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincospi.f64(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @llvm.sincospi.f64(ir<%in_val>)
; CHECK-COST-ARMPL-LABEL: sincospi_f64
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincospi.f64(double %in_val)
; CHECK-COST-ARMPL: Cost of 12 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sincospi.f64(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>)
define void @sincospi_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {