[SLP]Do not include copyable data to the same user twice

If the copyable schedule data is created and the user is used several
times in the user node, no need to count same data for the same user
several times, need to include it only ones.

Fixes #153754
This commit is contained in:
Alexey Bataev 2025-08-15 10:46:56 -07:00
parent 732eb5427c
commit b157599156
2 changed files with 75 additions and 0 deletions

View File

@ -5336,6 +5336,7 @@ private:
ArrayRef<Value *> Op = EI.UserTE->getOperand(EI.EdgeIdx);
const auto *It = find(Op, I);
assert(It != Op.end() && "Lane not set");
SmallPtrSet<Instruction *, 4> Visited;
do {
int Lane = std::distance(Op.begin(), It);
assert(Lane >= 0 && "Lane not set");
@ -5345,6 +5346,10 @@ private:
assert(Lane < static_cast<int>(EI.UserTE->Scalars.size()) &&
"Couldn't find extract lane");
auto *In = cast<Instruction>(EI.UserTE->Scalars[Lane]);
if (!Visited.insert(In).second) {
It = find(make_range(std::next(It), Op.end()), I);
continue;
}
ScheduleCopyableDataMapByInstUser
.try_emplace(std::make_pair(std::make_pair(In, EI.EdgeIdx), I))
.first->getSecond()
@ -20927,6 +20932,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
}
ScheduledBundlesList.pop_back();
SmallVector<ScheduleData *> ControlDependentMembers;
SmallPtrSet<Instruction *, 4> Visited;
for (Value *V : VL) {
if (S.isNonSchedulable(V))
continue;
@ -20944,6 +20950,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
ArrayRef<Value *> Op = EI.UserTE->getOperand(EI.EdgeIdx);
const auto *It = find(Op, I);
assert(It != Op.end() && "Lane not set");
SmallPtrSet<Instruction *, 4> Visited;
do {
int Lane = std::distance(Op.begin(), It);
assert(Lane >= 0 && "Lane not set");
@ -20953,6 +20960,10 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
assert(Lane < static_cast<int>(EI.UserTE->Scalars.size()) &&
"Couldn't find extract lane");
auto *In = cast<Instruction>(EI.UserTE->Scalars[Lane]);
if (!Visited.insert(In).second) {
It = find(make_range(std::next(It), Op.end()), I);
break;
}
ScheduleCopyableDataMapByInstUser
[std::make_pair(std::make_pair(In, EI.EdgeIdx), I)]
.pop_back();

View File

@ -0,0 +1,64 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
define i32 @test(ptr %o, i32 %b.021.i) {
; CHECK-LABEL: define i32 @test(
; CHECK-SAME: ptr [[O:%.*]], i32 [[B_021_I:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[O1:%.*]] = alloca [3 x i32], align 4
; CHECK-NEXT: br label %[[WHILE_BODY:.*]]
; CHECK: [[WHILE_BODY]]:
; CHECK-NEXT: [[SUB623:%.*]] = phi i32 [ [[SUB6:%.*]], %[[N_EXIT:.*]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[ADD21:%.*]] = phi i32 [ [[ADD:%.*]], %[[N_EXIT]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[ADD419:%.*]] = phi i32 [ [[ADD4:%.*]], %[[N_EXIT]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[ADD18:%.*]] = phi i32 [ [[ADD]], %[[N_EXIT]] ], [ 1, %[[ENTRY]] ]
; CHECK-NEXT: store i32 [[ADD419]], ptr [[O1]], align 4
; CHECK-NEXT: store i32 [[ADD18]], ptr [[O]], align 4
; CHECK-NEXT: br label %[[FOR_BODY4_I:.*]]
; CHECK: [[FOR_COND1_I:.*]]:
; CHECK-NEXT: ret i32 0
; CHECK: [[FOR_BODY4_I]]:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[O1]], align 4
; CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[FOR_COND1_I]], label %[[N_EXIT]]
; CHECK: [[N_EXIT]]:
; CHECK-NEXT: [[SUB:%.*]] = or i32 [[B_021_I]], [[ADD21]]
; CHECK-NEXT: [[ADD]] = or i32 [[SUB]], 1
; CHECK-NEXT: [[ADD2:%.*]] = or i32 [[B_021_I]], 1
; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[SUB623]]
; CHECK-NEXT: [[ADD4]] = or i32 [[ADD3]], 1
; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[B_021_I]], 1
; CHECK-NEXT: [[SUB6]] = or i32 [[MUL]], 1
; CHECK-NEXT: br label %[[WHILE_BODY]]
;
entry:
%o1 = alloca [3 x i32], align 4
br label %while.body
while.body: ; preds = %n.exit, %entry
%sub623 = phi i32 [ %sub6, %n.exit ], [ 0, %entry ]
%add21 = phi i32 [ %add, %n.exit ], [ 0, %entry ]
%add419 = phi i32 [ %add4, %n.exit ], [ 0, %entry ]
%add18 = phi i32 [ %add, %n.exit ], [ 1, %entry ]
store i32 %add419, ptr %o1, align 4
store i32 %add18, ptr %o, align 4
br label %for.body4.i
for.cond1.i: ; preds = %for.body4.i
ret i32 0
for.body4.i: ; preds = %while.body
%0 = load i32, ptr %o1, align 4
%tobool.not.i = icmp eq i32 %0, 0
br i1 %tobool.not.i, label %for.cond1.i, label %n.exit
n.exit: ; preds = %for.body4.i
%sub = or i32 %b.021.i, %add21
%add = or i32 %sub, 1
%add2 = or i32 %b.021.i, 1
%add3 = add i32 %add2, %sub623
%add4 = or i32 %add3, 1
%mul = shl i32 %b.021.i, 1
%sub6 = or i32 %mul, 1
br label %while.body
}