llvm-project/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll
Jonas Paulsson f5c8c1eedb
[SLPVectorizer] Move X86 specific handling into X86TTIImpl. (#137830)
`ad9909d "[SLP]Fix perfect diamond match with extractelements in scalars" `
changed SLPVectorizer getScalarizationOverhead() to call
TTI.getVectorInstrCost() instead of TTI.getScalarizationOverhead() in some
cases. This was due to X86 specific handlings in these (overridden) methods,
and unfortunately the general preference of TTI.getScalarizationOverhead()
was dropped. If VL is available it should always be preferred to use
getScalarizationOverhead(), and this is indeed the case for SystemZ which
has a special insertion instruction that can insert two GPR64s.

Then ` 33af951 "[SLP]Synchronize cost of gather/buildvector nodes with
codegen"` reworked SLPVectorizer getGatherCost() which together with
ad9909d caused the SystemZ test vec-elt-insertion.ll to fail.

This patch restores the SystemZ test and reverts the change in SLPVectorizer
getScalarizationOverhead() so that TTI.getScalarizationOverhead() is always
called again. The ForPoisonSrc argument is now passed on to the TTI method
so that X86 can handle this as required.

Fixes: #135346
2025-04-30 17:11:27 +02:00

101 lines
4.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx < %s \
; RUN: | FileCheck %s
; REQUIRES: x86-registered-target
define void @test(ptr %p1, ptr %0, i32 %1, i1 %c1, ptr %p2) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[P1:%.*]], ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i1 [[C1:%.*]], ptr [[P2:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TOP:.*:]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 12
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i64 20
; CHECK-NEXT: br i1 [[C1]], label %[[L42:.*]], label %[[L41:.*]]
; CHECK: [[L41]]:
; CHECK-NEXT: [[DOTNOT276:%.*]] = icmp eq ptr [[TMP2]], null
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[DOTNOT276]], i32 0, i32 [[TMP6]]
; CHECK-NEXT: [[DOTNOT277:%.*]] = icmp eq ptr [[TMP3]], null
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT277]], i32 0, i32 [[TMP8]]
; CHECK-NEXT: [[DOTNOT278:%.*]] = icmp eq ptr [[TMP4]], null
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[DOTNOT278]], i32 0, i32 [[TMP10]]
; CHECK-NEXT: [[DOTNOT279:%.*]] = icmp eq ptr [[TMP5]], null
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[DOTNOT279]], i32 0, i32 [[TMP12]]
; CHECK-NEXT: br label %[[L112:.*]]
; CHECK: [[L42]]:
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[DOTNOT280:%.*]] = icmp eq i32 [[TMP14]], 0
; CHECK-NEXT: br i1 [[DOTNOT280]], label %[[L112]], label %[[L47:.*]]
; CHECK: [[L47]]:
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: [[DOTNOT282:%.*]] = icmp eq ptr [[TMP4]], null
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[DOTNOT282]], i32 0, i32 [[TMP16]]
; CHECK-NEXT: [[DOTNOT283:%.*]] = icmp eq ptr [[TMP5]], null
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[DOTNOT283]], i32 0, i32 [[TMP18]]
; CHECK-NEXT: br label %[[L112]]
; CHECK: [[L112]]:
; CHECK-NEXT: [[VALUE_PHI13336:%.*]] = phi i32 [ [[TMP19]], %[[L47]] ], [ [[TMP13]], %[[L41]] ], [ 0, %[[L42]] ]
; CHECK-NEXT: [[VALUE_PHI12335:%.*]] = phi i32 [ [[TMP17]], %[[L47]] ], [ [[TMP11]], %[[L41]] ], [ [[TMP1]], %[[L42]] ]
; CHECK-NEXT: [[VALUE_PHI11334:%.*]] = phi i32 [ [[TMP15]], %[[L47]] ], [ [[TMP9]], %[[L41]] ], [ 0, %[[L42]] ]
; CHECK-NEXT: [[VALUE_PHI10333:%.*]] = phi i32 [ 0, %[[L47]] ], [ [[TMP7]], %[[L41]] ], [ 0, %[[L42]] ]
; CHECK-NEXT: store i32 [[VALUE_PHI10333]], ptr [[P2]], align 4
; CHECK-NEXT: store i32 [[VALUE_PHI11334]], ptr [[P1]], align 4
; CHECK-NEXT: store i32 [[VALUE_PHI12335]], ptr [[P2]], align 4
; CHECK-NEXT: store i32 [[VALUE_PHI13336]], ptr [[P1]], align 4
; CHECK-NEXT: ret void
;
top:
%2 = getelementptr i8, ptr %0, i64 8
%3 = getelementptr i8, ptr %0, i64 12
%4 = getelementptr i8, ptr %0, i64 16
%5 = getelementptr i8, ptr %0, i64 20
br i1 %c1, label %L42, label %L41
L41:
%.not276 = icmp eq ptr %2, null
%6 = load i32, ptr %2, align 4
%7 = select i1 %.not276, i32 0, i32 %6
%.not277 = icmp eq ptr %3, null
%8 = load i32, ptr %3, align 4
%9 = select i1 %.not277, i32 0, i32 %8
%.not278 = icmp eq ptr %4, null
%10 = load i32, ptr %4, align 4
%11 = select i1 %.not278, i32 0, i32 %10
%.not279 = icmp eq ptr %5, null
%12 = load i32, ptr %5, align 4
%13 = select i1 %.not279, i32 0, i32 %12
br label %L112
L42:
%14 = load i32, ptr %2, align 4
%.not280 = icmp eq i32 %14, 0
br i1 %.not280, label %L112, label %L47
L47:
%15 = load i32, ptr %3, align 4
%.not282 = icmp eq ptr %4, null
%16 = load i32, ptr %4, align 4
%17 = select i1 %.not282, i32 0, i32 %16
%.not283 = icmp eq ptr %5, null
%18 = load i32, ptr %5, align 4
%19 = select i1 %.not283, i32 0, i32 %18
br label %L112
L112:
%value_phi13336 = phi i32 [ %19, %L47 ], [ %13, %L41 ], [ 0, %L42 ]
%value_phi12335 = phi i32 [ %17, %L47 ], [ %11, %L41 ], [ %1, %L42 ]
%value_phi11334 = phi i32 [ %15, %L47 ], [ %9, %L41 ], [ 0, %L42 ]
%value_phi10333 = phi i32 [ 0, %L47 ], [ %7, %L41 ], [ 0, %L42 ]
store i32 %value_phi10333, ptr %p2, align 4
store i32 %value_phi11334, ptr %p1, align 4
store i32 %value_phi12335, ptr %p2, align 4
store i32 %value_phi13336, ptr %p1, align 4
ret void
}