llvm-project/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll
Alexey Bataev 7152bf3bc8 [SLP]Do not create new vector node if scalars fully overlap with the existing one
If the list of scalars vectorized as the part of the same vector node,
no need to generate vector node again, it will be handled as part of
overlapping matching.

Fixes #113810
2024-10-28 06:59:41 -07:00

94 lines
4.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s
define void @test(ptr %p1, ptr %0, i32 %1, i1 %c1, ptr %p2) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[P1:%.*]], ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i1 [[C1:%.*]], ptr [[P2:%.*]]) {
; CHECK-NEXT: [[TOP:.*:]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <4 x ptr> [[TMP4]], <4 x i64> <i64 8, i64 12, i64 16, i64 20>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 2
; CHECK-NEXT: br i1 [[C1]], label %[[L42:.*]], label %[[L41:.*]]
; CHECK: [[L41]]:
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x ptr> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> zeroinitializer, <4 x i32> [[TMP8]]
; CHECK-NEXT: br label %[[L112:.*]]
; CHECK: [[L42]]:
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[DOTNOT280:%.*]] = icmp eq i32 [[TMP10]], 0
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP1]], i32 2
; CHECK-NEXT: br i1 [[DOTNOT280]], label %[[L112]], label %[[L47:.*]]
; CHECK: [[L47]]:
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 1
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x ptr> [[TMP14]], zeroinitializer
; CHECK-NEXT: [[TMP16:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> zeroinitializer, <2 x i32> [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, i32 [[TMP13]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP18]], <2 x i32> [[TMP17]], i64 2)
; CHECK-NEXT: br label %[[L112]]
; CHECK: [[L112]]:
; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP19]], %[[L47]] ], [ [[TMP9]], %[[L41]] ], [ [[TMP11]], %[[L42]] ]
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP20]], i32 0
; CHECK-NEXT: store i32 [[TMP21]], ptr [[P2]], align 4
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP20]], i32 1
; CHECK-NEXT: store i32 [[TMP22]], ptr [[P1]], align 4
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP20]], i32 2
; CHECK-NEXT: store i32 [[TMP23]], ptr [[P2]], align 4
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP20]], i32 3
; CHECK-NEXT: store i32 [[TMP24]], ptr [[P1]], align 4
; CHECK-NEXT: ret void
;
top:
%2 = getelementptr i8, ptr %0, i64 8
%3 = getelementptr i8, ptr %0, i64 12
%4 = getelementptr i8, ptr %0, i64 16
%5 = getelementptr i8, ptr %0, i64 20
br i1 %c1, label %L42, label %L41
L41:
%.not276 = icmp eq ptr %2, null
%6 = load i32, ptr %2, align 4
%7 = select i1 %.not276, i32 0, i32 %6
%.not277 = icmp eq ptr %3, null
%8 = load i32, ptr %3, align 4
%9 = select i1 %.not277, i32 0, i32 %8
%.not278 = icmp eq ptr %4, null
%10 = load i32, ptr %4, align 4
%11 = select i1 %.not278, i32 0, i32 %10
%.not279 = icmp eq ptr %5, null
%12 = load i32, ptr %5, align 4
%13 = select i1 %.not279, i32 0, i32 %12
br label %L112
L42:
%14 = load i32, ptr %2, align 4
%.not280 = icmp eq i32 %14, 0
br i1 %.not280, label %L112, label %L47
L47:
%15 = load i32, ptr %3, align 4
%.not282 = icmp eq ptr %4, null
%16 = load i32, ptr %4, align 4
%17 = select i1 %.not282, i32 0, i32 %16
%.not283 = icmp eq ptr %5, null
%18 = load i32, ptr %5, align 4
%19 = select i1 %.not283, i32 0, i32 %18
br label %L112
L112:
%value_phi13336 = phi i32 [ %19, %L47 ], [ %13, %L41 ], [ 0, %L42 ]
%value_phi12335 = phi i32 [ %17, %L47 ], [ %11, %L41 ], [ %1, %L42 ]
%value_phi11334 = phi i32 [ %15, %L47 ], [ %9, %L41 ], [ 0, %L42 ]
%value_phi10333 = phi i32 [ 0, %L47 ], [ %7, %L41 ], [ 0, %L42 ]
store i32 %value_phi10333, ptr %p2, align 4
store i32 %value_phi11334, ptr %p1, align 4
store i32 %value_phi12335, ptr %p2, align 4
store i32 %value_phi13336, ptr %p1, align 4
ret void
}