[SLP]Fix PR108709: postpone buildvector clustered nodes, if required
The "clustered" nodes for buildvector nodes must be postponed in accordance with the global flag, otherwise it may cause crash because of the dependency between phi nodes.
This commit is contained in:
parent
69f3244da7
commit
18ef467d73
@ -2883,7 +2883,8 @@ private:
|
||||
/// Create a new vector from a list of scalar values. Produces a sequence
|
||||
/// which exploits values reused across lanes, and arranges the inserts
|
||||
/// for ease of later optimization.
|
||||
Value *createBuildVector(const TreeEntry *E, Type *ScalarTy);
|
||||
Value *createBuildVector(const TreeEntry *E, Type *ScalarTy,
|
||||
bool PostponedPHIs);
|
||||
|
||||
/// Returns the instruction in the bundle, which can be used as a base point
|
||||
/// for scheduling. Usually it is the last instruction in the bundle, except
|
||||
@ -13198,9 +13199,10 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
|
||||
return Res;
|
||||
}
|
||||
|
||||
Value *BoUpSLP::createBuildVector(const TreeEntry *E, Type *ScalarTy) {
|
||||
Value *BoUpSLP::createBuildVector(const TreeEntry *E, Type *ScalarTy,
|
||||
bool PostponedPHIs) {
|
||||
for (auto [EIdx, _] : E->CombinedEntriesWithIndices)
|
||||
(void)vectorizeTree(VectorizableTree[EIdx].get(), /*PostponedPHIs=*/false);
|
||||
(void)vectorizeTree(VectorizableTree[EIdx].get(), PostponedPHIs);
|
||||
return processBuildVector<ShuffleInstructionBuilder, Value *>(E, ScalarTy,
|
||||
Builder, *this);
|
||||
}
|
||||
@ -13231,7 +13233,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
|
||||
// Set insert point for non-reduction initial nodes.
|
||||
if (E->getMainOp() && E->Idx == 0 && !UserIgnoreList)
|
||||
setInsertPointAfterBundle(E);
|
||||
Value *Vec = createBuildVector(E, ScalarTy);
|
||||
Value *Vec = createBuildVector(E, ScalarTy, PostponedPHIs);
|
||||
E->VectorizedValue = Vec;
|
||||
return Vec;
|
||||
}
|
||||
|
@ -0,0 +1,46 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
define void @test() {
|
||||
; CHECK-LABEL: define void @test() {
|
||||
; CHECK-NEXT: [[BB:.*]]:
|
||||
; CHECK-NEXT: br label %[[BB6:.*]]
|
||||
; CHECK: [[BB1:.*]]:
|
||||
; CHECK-NEXT: br label %[[BB2:.*]]
|
||||
; CHECK: [[BB2]]:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ poison, %[[BB1]] ], [ [[TMP5:%.*]], %[[BB6]] ]
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: [[BB6]]:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <2 x i32> [[TMP1]], i64 2)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> zeroinitializer, [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> zeroinitializer, [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <2 x i32> <i32 2, i32 poison>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> <i32 poison, i32 0>, <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[TMP8]] = mul <2 x i32> zeroinitializer, [[TMP7]]
|
||||
; CHECK-NEXT: br i1 false, label %[[BB2]], label %[[BB6]]
|
||||
;
|
||||
bb:
|
||||
br label %bb6
|
||||
|
||||
bb1:
|
||||
%ashr = ashr i32 0, 0
|
||||
br label %bb2
|
||||
|
||||
bb2:
|
||||
%phi = phi i32 [ %ashr, %bb1 ], [ %ashr9, %bb6 ]
|
||||
%phi3 = phi i32 [ 0, %bb1 ], [ %mul10, %bb6 ]
|
||||
%phi4 = phi i32 [ 0, %bb1 ], [ %mul11, %bb6 ]
|
||||
%phi5 = phi i32 [ 0, %bb1 ], [ %mul, %bb6 ]
|
||||
ret void
|
||||
|
||||
bb6:
|
||||
%phi7 = phi i32 [ 0, %bb ], [ %mul11, %bb6 ]
|
||||
%phi8 = phi i32 [ 0, %bb ], [ %mul10, %bb6 ]
|
||||
%mul = mul i32 0, %phi8
|
||||
%ashr9 = ashr i32 0, 0
|
||||
%mul10 = mul i32 0, 0
|
||||
%mul11 = mul i32 %phi7, 0
|
||||
br i1 false, label %bb2, label %bb6
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user