diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 45c24bee6516..b88de09a3e44 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5336,6 +5336,7 @@ private: ArrayRef Op = EI.UserTE->getOperand(EI.EdgeIdx); const auto *It = find(Op, I); assert(It != Op.end() && "Lane not set"); + SmallPtrSet Visited; do { int Lane = std::distance(Op.begin(), It); assert(Lane >= 0 && "Lane not set"); @@ -5345,6 +5346,10 @@ private: assert(Lane < static_cast(EI.UserTE->Scalars.size()) && "Couldn't find extract lane"); auto *In = cast(EI.UserTE->Scalars[Lane]); + if (!Visited.insert(In).second) { + It = find(make_range(std::next(It), Op.end()), I); + continue; + } ScheduleCopyableDataMapByInstUser .try_emplace(std::make_pair(std::make_pair(In, EI.EdgeIdx), I)) .first->getSecond() @@ -20927,6 +20932,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, } ScheduledBundlesList.pop_back(); SmallVector ControlDependentMembers; + SmallPtrSet Visited; for (Value *V : VL) { if (S.isNonSchedulable(V)) continue; @@ -20944,6 +20950,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, ArrayRef Op = EI.UserTE->getOperand(EI.EdgeIdx); const auto *It = find(Op, I); assert(It != Op.end() && "Lane not set"); + SmallPtrSet Visited; do { int Lane = std::distance(Op.begin(), It); assert(Lane >= 0 && "Lane not set"); @@ -20953,6 +20960,10 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, assert(Lane < static_cast(EI.UserTE->Scalars.size()) && "Couldn't find extract lane"); auto *In = cast(EI.UserTE->Scalars[Lane]); + if (!Visited.insert(In).second) { + It = find(make_range(std::next(It), Op.end()), I); + break; + } ScheduleCopyableDataMapByInstUser [std::make_pair(std::make_pair(In, EI.EdgeIdx), I)] .pop_back(); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/schedule-same-user-with-copyable.ll b/llvm/test/Transforms/SLPVectorizer/X86/schedule-same-user-with-copyable.ll new file mode 100644 index 000000000000..c53ccf425a31 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/schedule-same-user-with-copyable.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @test(ptr %o, i32 %b.021.i) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: ptr [[O:%.*]], i32 [[B_021_I:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[O1:%.*]] = alloca [3 x i32], align 4 +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[SUB623:%.*]] = phi i32 [ [[SUB6:%.*]], %[[N_EXIT:.*]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[ADD21:%.*]] = phi i32 [ [[ADD:%.*]], %[[N_EXIT]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[ADD419:%.*]] = phi i32 [ [[ADD4:%.*]], %[[N_EXIT]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[ADD18:%.*]] = phi i32 [ [[ADD]], %[[N_EXIT]] ], [ 1, %[[ENTRY]] ] +; CHECK-NEXT: store i32 [[ADD419]], ptr [[O1]], align 4 +; CHECK-NEXT: store i32 [[ADD18]], ptr [[O]], align 4 +; CHECK-NEXT: br label %[[FOR_BODY4_I:.*]] +; CHECK: [[FOR_COND1_I:.*]]: +; CHECK-NEXT: ret i32 0 +; CHECK: [[FOR_BODY4_I]]: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[O1]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[FOR_COND1_I]], label %[[N_EXIT]] +; CHECK: [[N_EXIT]]: +; CHECK-NEXT: [[SUB:%.*]] = or i32 [[B_021_I]], [[ADD21]] +; CHECK-NEXT: [[ADD]] = or i32 [[SUB]], 1 +; CHECK-NEXT: [[ADD2:%.*]] = or i32 [[B_021_I]], 1 +; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[SUB623]] +; CHECK-NEXT: [[ADD4]] = or i32 [[ADD3]], 1 +; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[B_021_I]], 1 +; CHECK-NEXT: [[SUB6]] = or i32 [[MUL]], 1 +; CHECK-NEXT: br label %[[WHILE_BODY]] +; +entry: + %o1 = alloca [3 x i32], align 4 + br label %while.body + +while.body: ; preds = %n.exit, %entry + %sub623 = phi i32 [ %sub6, %n.exit ], [ 0, %entry ] + %add21 = phi i32 [ %add, %n.exit ], [ 0, %entry ] + %add419 = phi i32 [ %add4, %n.exit ], [ 0, %entry ] + %add18 = phi i32 [ %add, %n.exit ], [ 1, %entry ] + store i32 %add419, ptr %o1, align 4 + store i32 %add18, ptr %o, align 4 + br label %for.body4.i + +for.cond1.i: ; preds = %for.body4.i + ret i32 0 + +for.body4.i: ; preds = %while.body + %0 = load i32, ptr %o1, align 4 + %tobool.not.i = icmp eq i32 %0, 0 + br i1 %tobool.not.i, label %for.cond1.i, label %n.exit + +n.exit: ; preds = %for.body4.i + %sub = or i32 %b.021.i, %add21 + %add = or i32 %sub, 1 + %add2 = or i32 %b.021.i, 1 + %add3 = add i32 %add2, %sub623 + %add4 = or i32 %add3, 1 + %mul = shl i32 %b.021.i, 1 + %sub6 = or i32 %mul, 1 + br label %while.body +}