
`ad9909d "[SLP]Fix perfect diamond match with extractelements in scalars" ` changed SLPVectorizer getScalarizationOverhead() to call TTI.getVectorInstrCost() instead of TTI.getScalarizationOverhead() in some cases. This was due to X86 specific handlings in these (overridden) methods, and unfortunately the general preference of TTI.getScalarizationOverhead() was dropped. If VL is available it should always be preferred to use getScalarizationOverhead(), and this is indeed the case for SystemZ which has a special insertion instruction that can insert two GPR64s. Then ` 33af951 "[SLP]Synchronize cost of gather/buildvector nodes with codegen"` reworked SLPVectorizer getGatherCost() which together with ad9909d caused the SystemZ test vec-elt-insertion.ll to fail. This patch restores the SystemZ test and reverts the change in SLPVectorizer getScalarizationOverhead() so that TTI.getScalarizationOverhead() is always called again. The ForPoisonSrc argument is now passed on to the TTI method so that X86 can handle this as required. Fixes: #135346
101 lines
4.7 KiB
LLVM
101 lines
4.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx < %s \
|
|
; RUN: | FileCheck %s
|
|
; REQUIRES: x86-registered-target
|
|
|
|
define void @test(ptr %p1, ptr %0, i32 %1, i1 %c1, ptr %p2) {
|
|
; CHECK-LABEL: define void @test(
|
|
; CHECK-SAME: ptr [[P1:%.*]], ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i1 [[C1:%.*]], ptr [[P2:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[TOP:.*:]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 12
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16
|
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i64 20
|
|
; CHECK-NEXT: br i1 [[C1]], label %[[L42:.*]], label %[[L41:.*]]
|
|
; CHECK: [[L41]]:
|
|
; CHECK-NEXT: [[DOTNOT276:%.*]] = icmp eq ptr [[TMP2]], null
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
|
|
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[DOTNOT276]], i32 0, i32 [[TMP6]]
|
|
; CHECK-NEXT: [[DOTNOT277:%.*]] = icmp eq ptr [[TMP3]], null
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP3]], align 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT277]], i32 0, i32 [[TMP8]]
|
|
; CHECK-NEXT: [[DOTNOT278:%.*]] = icmp eq ptr [[TMP4]], null
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4
|
|
; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[DOTNOT278]], i32 0, i32 [[TMP10]]
|
|
; CHECK-NEXT: [[DOTNOT279:%.*]] = icmp eq ptr [[TMP5]], null
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP5]], align 4
|
|
; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[DOTNOT279]], i32 0, i32 [[TMP12]]
|
|
; CHECK-NEXT: br label %[[L112:.*]]
|
|
; CHECK: [[L42]]:
|
|
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP2]], align 4
|
|
; CHECK-NEXT: [[DOTNOT280:%.*]] = icmp eq i32 [[TMP14]], 0
|
|
; CHECK-NEXT: br i1 [[DOTNOT280]], label %[[L112]], label %[[L47:.*]]
|
|
; CHECK: [[L47]]:
|
|
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP3]], align 4
|
|
; CHECK-NEXT: [[DOTNOT282:%.*]] = icmp eq ptr [[TMP4]], null
|
|
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4
|
|
; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[DOTNOT282]], i32 0, i32 [[TMP16]]
|
|
; CHECK-NEXT: [[DOTNOT283:%.*]] = icmp eq ptr [[TMP5]], null
|
|
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP5]], align 4
|
|
; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[DOTNOT283]], i32 0, i32 [[TMP18]]
|
|
; CHECK-NEXT: br label %[[L112]]
|
|
; CHECK: [[L112]]:
|
|
; CHECK-NEXT: [[VALUE_PHI13336:%.*]] = phi i32 [ [[TMP19]], %[[L47]] ], [ [[TMP13]], %[[L41]] ], [ 0, %[[L42]] ]
|
|
; CHECK-NEXT: [[VALUE_PHI12335:%.*]] = phi i32 [ [[TMP17]], %[[L47]] ], [ [[TMP11]], %[[L41]] ], [ [[TMP1]], %[[L42]] ]
|
|
; CHECK-NEXT: [[VALUE_PHI11334:%.*]] = phi i32 [ [[TMP15]], %[[L47]] ], [ [[TMP9]], %[[L41]] ], [ 0, %[[L42]] ]
|
|
; CHECK-NEXT: [[VALUE_PHI10333:%.*]] = phi i32 [ 0, %[[L47]] ], [ [[TMP7]], %[[L41]] ], [ 0, %[[L42]] ]
|
|
; CHECK-NEXT: store i32 [[VALUE_PHI10333]], ptr [[P2]], align 4
|
|
; CHECK-NEXT: store i32 [[VALUE_PHI11334]], ptr [[P1]], align 4
|
|
; CHECK-NEXT: store i32 [[VALUE_PHI12335]], ptr [[P2]], align 4
|
|
; CHECK-NEXT: store i32 [[VALUE_PHI13336]], ptr [[P1]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
top:
|
|
%2 = getelementptr i8, ptr %0, i64 8
|
|
%3 = getelementptr i8, ptr %0, i64 12
|
|
%4 = getelementptr i8, ptr %0, i64 16
|
|
%5 = getelementptr i8, ptr %0, i64 20
|
|
br i1 %c1, label %L42, label %L41
|
|
|
|
L41:
|
|
%.not276 = icmp eq ptr %2, null
|
|
%6 = load i32, ptr %2, align 4
|
|
%7 = select i1 %.not276, i32 0, i32 %6
|
|
%.not277 = icmp eq ptr %3, null
|
|
%8 = load i32, ptr %3, align 4
|
|
%9 = select i1 %.not277, i32 0, i32 %8
|
|
%.not278 = icmp eq ptr %4, null
|
|
%10 = load i32, ptr %4, align 4
|
|
%11 = select i1 %.not278, i32 0, i32 %10
|
|
%.not279 = icmp eq ptr %5, null
|
|
%12 = load i32, ptr %5, align 4
|
|
%13 = select i1 %.not279, i32 0, i32 %12
|
|
br label %L112
|
|
|
|
L42:
|
|
%14 = load i32, ptr %2, align 4
|
|
%.not280 = icmp eq i32 %14, 0
|
|
br i1 %.not280, label %L112, label %L47
|
|
|
|
L47:
|
|
%15 = load i32, ptr %3, align 4
|
|
%.not282 = icmp eq ptr %4, null
|
|
%16 = load i32, ptr %4, align 4
|
|
%17 = select i1 %.not282, i32 0, i32 %16
|
|
%.not283 = icmp eq ptr %5, null
|
|
%18 = load i32, ptr %5, align 4
|
|
%19 = select i1 %.not283, i32 0, i32 %18
|
|
br label %L112
|
|
|
|
L112:
|
|
%value_phi13336 = phi i32 [ %19, %L47 ], [ %13, %L41 ], [ 0, %L42 ]
|
|
%value_phi12335 = phi i32 [ %17, %L47 ], [ %11, %L41 ], [ %1, %L42 ]
|
|
%value_phi11334 = phi i32 [ %15, %L47 ], [ %9, %L41 ], [ 0, %L42 ]
|
|
%value_phi10333 = phi i32 [ 0, %L47 ], [ %7, %L41 ], [ 0, %L42 ]
|
|
store i32 %value_phi10333, ptr %p2, align 4
|
|
store i32 %value_phi11334, ptr %p1, align 4
|
|
store i32 %value_phi12335, ptr %p2, align 4
|
|
store i32 %value_phi13336, ptr %p1, align 4
|
|
ret void
|
|
}
|