Ryan Buchner f180d4bb46
[SLP] Report the correct operand to getArithmeticInstrCost() when duplicated scalars (#174442)
Before, we were selecting the wrong operand in cases when Scalars
contained duplicate values. Stems from #135797.

Using:
`opt -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v t.ll`
```
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64"

define void @foo(ptr noalias %A, ptr noalias %B) {
entry:
  %0 = load i32, ptr %B
  %add = add nsw i32 %0, 1
  store i32 %add, ptr %A
  %arrayidx.1 = getelementptr inbounds nuw i8, ptr %B, i64 4
  %1 = load i32, ptr %arrayidx.1
  %add.1 = add nsw i32 %1, 1
  %arrayidx2.1 = getelementptr inbounds nuw i8, ptr %A, i64 4
  store i32 %add.1, ptr %arrayidx2.1
  %arrayidx.2 = getelementptr inbounds nuw i8, ptr %B, i64 8
  %2 = load i32, ptr %arrayidx.2
  %add.2 = add nsw i32 %2, 1
  %arrayidx2.2 = getelementptr inbounds nuw i8, ptr %A, i64 8
  store i32 %add.2, ptr %arrayidx2.2
  %arrayidx.3 = getelementptr inbounds nuw i8, ptr %B, i64 12

  %arrayidx2.3 = getelementptr inbounds nuw i8, ptr %A, i64 12

  store i32 %add, ptr %arrayidx2.3
  %arrayidx.4 = getelementptr inbounds nuw i8, ptr %B, i64 16
  %4 = load i32, ptr %arrayidx.4
  %add.4 = add nsw i32 %4, 1
  %arrayidx2.4 = getelementptr inbounds nuw i8, ptr %A, i64 16
  store i32 %add.4, ptr %arrayidx2.4
  %arrayidx.5 = getelementptr inbounds nuw i8, ptr %B, i64 20
  %5 = load i32, ptr %arrayidx.5
  %add.5 = add nsw i32 %5, 1
  %arrayidx2.5 = getelementptr inbounds nuw i8, ptr %A, i64 20
  store i32 %add.5, ptr %arrayidx2.5
  %arrayidx.6 = getelementptr inbounds nuw i8, ptr %B, i64 24
  %6 = load i32, ptr %arrayidx.6
  %add.6 = add nsw i32 %6, 1
  %arrayidx2.6 = getelementptr inbounds nuw i8, ptr %A, i64 24
  store i32 %add.6, ptr %arrayidx2.6
  %arrayidx.7 = getelementptr inbounds nuw i8, ptr %B, i64 28
  %7 = load i32, ptr %arrayidx.7
  %add.7 = add nsw i32 %7, 1
  %arrayidx2.7 = getelementptr inbounds nuw i8, ptr %A, i64 28
  store i32 %add.7, ptr %arrayidx2.7
  ret void
}
```

The following trace is produced, note the wrong operand is used for `Idx
> 2`

Before:
```
GetScalarCost(), Idx=0
UniqueValues[Idx]:   %add = add nsw i32 %0, 1
Op1:   %0 = load i32, ptr %B, align 4
GetScalarCost(), Idx=1
UniqueValues[Idx]:   %add.1 = add nsw i32 %1, 1
Op1:   %1 = load i32, ptr %arrayidx.1, align 4
GetScalarCost(), Idx=2
UniqueValues[Idx]:   %add.2 = add nsw i32 %2, 1
Op1:   %2 = load i32, ptr %arrayidx.2, align 4
GetScalarCost(), Idx=3
UniqueValues[Idx]:   %add.4 = add nsw i32 %3, 1
Op1:   %0 = load i32, ptr %B, align 4
GetScalarCost(), Idx=4
UniqueValues[Idx]:   %add.5 = add nsw i32 %4, 1
Op1:   %3 = load i32, ptr %arrayidx.4, align 4
GetScalarCost(), Idx=5
UniqueValues[Idx]:   %add.6 = add nsw i32 %5, 1
Op1:   %4 = load i32, ptr %arrayidx.5, align 4
GetScalarCost(), Idx=6
UniqueValues[Idx]:   %add.7 = add nsw i32 %6, 1
Op1:   %5 = load i32, ptr %arrayidx.6, align 4
```

After:
```
GetScalarCost(), Idx=0
UniqueValues[Idx]:   %add = add nsw i32 %0, 1
Op1:   %0 = load i32, ptr %B, align 4
GetScalarCost(), Idx=1
UniqueValues[Idx]:   %add.1 = add nsw i32 %1, 1
Op1:   %1 = load i32, ptr %arrayidx.1, align 4
GetScalarCost(), Idx=2
UniqueValues[Idx]:   %add.2 = add nsw i32 %2, 1
Op1:   %2 = load i32, ptr %arrayidx.2, align 4
GetScalarCost(), Idx=3
UniqueValues[Idx]:   %add.4 = add nsw i32 %3, 1
Op1:   %3 = load i32, ptr %arrayidx.4, align 4
GetScalarCost(), Idx=4
UniqueValues[Idx]:   %add.5 = add nsw i32 %4, 1
Op1:   %4 = load i32, ptr %arrayidx.5, align 4
GetScalarCost(), Idx=5
UniqueValues[Idx]:   %add.6 = add nsw i32 %5, 1
Op1:   %5 = load i32, ptr %arrayidx.6, align 4
GetScalarCost(), Idx=6
UniqueValues[Idx]:   %add.7 = add nsw i32 %6, 1
Op1:   %6 = load i32, ptr %arrayidx.7, align 4
```
2026-01-05 22:25:25 +00:00

45 lines
1.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-unknown"
define void @foo(ptr noalias %A, ptr noalias %B) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 12
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
; CHECK-NEXT: [[ADD_3:%.*]] = udiv i32 [[TMP2]], 2
; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr i8, ptr [[A1:%.*]], i64 8
; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr i8, ptr [[A1]], i64 12
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = udiv <2 x i32> [[TMP1]], <i32 3, i32 8>
; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr [[A1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
; CHECK-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX2_2]], align 4
; CHECK-NEXT: store i32 [[ADD_3]], ptr [[ARRAYIDX2_3]], align 4
; CHECK-NEXT: ret void
;
entry:
%arrayidx.1 = getelementptr i8, ptr %B, i64 4
%arrayidx.3 = getelementptr i8, ptr %B, i64 12
%0 = load i32, ptr %B
%1 = load i32, ptr %arrayidx.1
%3 = load i32, ptr %arrayidx.3
%add = udiv i32 %0, 3
%add.1 = udiv i32 %1, 8
%add.3 = udiv i32 %3, 2
%arrayidx2.1 = getelementptr i8, ptr %A, i64 4
%arrayidx2.2 = getelementptr i8, ptr %A, i64 8
%arrayidx2.3 = getelementptr i8, ptr %A, i64 12
store i32 %add, ptr %A
store i32 %add.1, ptr %arrayidx2.1
store i32 %add, ptr %arrayidx2.2
store i32 %add.3, ptr %arrayidx2.3
ret void
}