After https://github.com/llvm/llvm-project/pull/153643, there may be a BranchOnCond with constant condition in the entry block. Simplify those in removeBranchOnConst. This removes a number of redundant conditional branch from entry blocks. In some cases, it may also make the original scalar loop unreachable, because we know it will never execute. In that case, we need to remove the loop from LoopInfo, because all unreachable blocks may dominate each other, making LoopInfo invalid. In those cases, we can also completely remove the loop, for which I'll share a follow-up patch. Depends on https://github.com/llvm/llvm-project/pull/153643. PR: https://github.com/llvm/llvm-project/pull/154510
258 lines
13 KiB
LLVM
258 lines
13 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 5
|
|
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -lv-strided-pointer-ivs=true -S %s | FileCheck %s
|
|
|
|
define void @step_direction_unknown(i32 %arg, ptr %dst) {
|
|
; CHECK-LABEL: define void @step_direction_unknown(
|
|
; CHECK-SAME: i32 [[ARG:%.*]], ptr [[DST:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[ARG]], 1
|
|
; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
|
|
; CHECK: [[VECTOR_SCEVCHECK]]:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[ARG]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[ADD]], 0
|
|
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 [[ADD]]
|
|
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP2]], i32 1023)
|
|
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
|
|
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0
|
|
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i1 [[TMP4]], i1 false
|
|
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[ADD]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i32> [[TMP8]] to <4 x i64>
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP9]], i32 0
|
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP10]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP9]], i32 1
|
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP12]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP9]], i32 2
|
|
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP14]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP9]], i32 3
|
|
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP16]]
|
|
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP11]], align 8
|
|
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP13]], align 8
|
|
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP15]], align 8
|
|
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP17]], align 8
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
|
|
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
|
|
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br [[EXIT:label %.*]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
;
|
|
entry:
|
|
%add = add i32 %arg, 1
|
|
br label %loop
|
|
|
|
loop:
|
|
%phi = phi i64 [ 0, %entry ], [ %add2, %loop ]
|
|
%trunc = trunc i64 %phi to i32
|
|
%mul = mul i32 %add, %trunc
|
|
%zext = zext i32 %mul to i64
|
|
%getelementptr = getelementptr double, ptr %dst, i64 %zext
|
|
store double 0.000000e+00, ptr %getelementptr, align 8
|
|
%add2 = add i64 %phi, 1
|
|
%icmp = icmp eq i64 %add2, 1024
|
|
br i1 %icmp, label %exit, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @integer_induction_wraps_scev_predicate_known(i32 %x, ptr %call, ptr %start) {
|
|
; CHECK-LABEL: define void @integer_induction_wraps_scev_predicate_known(
|
|
; CHECK-SAME: i32 [[X:%.*]], ptr [[CALL:%.*]], ptr [[START:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[X]], 1
|
|
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[MUL]] to i64
|
|
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 992, [[TMP0]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[BROADCAST_SPLAT]]
|
|
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[TMP3]]
|
|
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 30, [[DOTCAST]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[OFFSET_IDX]]
|
|
; CHECK-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP4]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP0]], 4
|
|
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP5]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
;
|
|
entry:
|
|
%mul = shl i32 %x, 1
|
|
br label %for.cond
|
|
|
|
for.cond: ; preds = %for.body, %entry
|
|
%iv = phi i32 [ 30, %entry ], [ %inc, %for.cond ]
|
|
%p.0 = phi ptr [ %start, %entry ], [ %add.ptr, %for.cond ]
|
|
%add.ptr = getelementptr i8, ptr %p.0, i32 %mul
|
|
%arrayidx = getelementptr ptr, ptr %call, i32 %iv
|
|
store ptr %p.0, ptr %arrayidx, align 4
|
|
%inc = add i32 %iv, 1
|
|
%tobool.not = icmp eq i32 %iv, 1024
|
|
br i1 %tobool.not, label %for.end, label %for.cond
|
|
|
|
for.end: ; preds = %for.cond
|
|
ret void
|
|
}
|
|
|
|
@h = global i64 0
|
|
|
|
define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) {
|
|
; CHECK-LABEL: define void @implied_wrap_predicate(
|
|
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[A3:%.*]] = ptrtoint ptr [[A]] to i64
|
|
; CHECK-NEXT: [[C2:%.*]] = ptrtoint ptr [[C]] to i64
|
|
; CHECK-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[A3]], 16
|
|
; CHECK-NEXT: [[UMAX4:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 add (i64 ptrtoint (ptr @h to i64), i64 1))
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[UMAX4]], -9
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[A3]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
|
|
; CHECK: [[VECTOR_SCEVCHECK]]:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[A1]], 16
|
|
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP5]], i64 add (i64 ptrtoint (ptr @h to i64), i64 1))
|
|
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[UMAX]], -9
|
|
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], [[A1]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP7]], 3
|
|
; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i16
|
|
; CHECK-NEXT: [[TMP10:%.*]] = add i16 2, [[TMP9]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i16 [[TMP10]], 2
|
|
; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[TMP8]], 65535
|
|
; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]]
|
|
; CHECK-NEXT: br i1 [[TMP13]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
|
|
; CHECK: [[VECTOR_MEMCHECK]]:
|
|
; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[C2]], [[A3]]
|
|
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], 32
|
|
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16
|
|
; CHECK-NEXT: [[TMP15:%.*]] = add i16 1, [[DOTCAST]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = add i64 1, [[N_VEC]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
|
; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr [[TMP17]], align 4
|
|
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i64, ptr [[C]], i64 [[OFFSET_IDX]]
|
|
; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr [[TMP18]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i16 [ 1, %entry ], [ %iv.next, %loop ]
|
|
%iv.ext = phi i64 [ 1, %entry ], [ %iv.ext.next, %loop ]
|
|
%gep.A = getelementptr i64, ptr %A, i64 %iv.ext
|
|
store i64 0, ptr %gep.A
|
|
%gep.C = getelementptr i64, ptr %C, i64 %iv.ext
|
|
store i64 0, ptr %gep.C
|
|
%iv.next = add i16 %iv, 1
|
|
%iv.ext.next = zext i16 %iv.next to i64
|
|
%gep = getelementptr i64, ptr %A, i64 %iv.ext.next
|
|
%cmp = icmp ugt ptr %gep, @h
|
|
br i1 %cmp, label %exit, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
declare i1 @cond()
|
|
|
|
; Test case for https://github.com/llvm/llvm-project/issues/131281.
|
|
; %add2 is known to not wrap via BTC.
|
|
define void @no_signed_wrap_iv_via_btc(ptr %dst, i32 %N) mustprogress {
|
|
; CHECK-LABEL: define void @no_signed_wrap_iv_via_btc(
|
|
; CHECK-SAME: ptr [[DST:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -100
|
|
; CHECK-NEXT: [[SUB4:%.*]] = add i32 [[N]], -99
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], 1
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SUB4]], i32 [[TMP0]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], 100
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[N]]
|
|
; CHECK-NEXT: br label %[[OUTER:.*]]
|
|
; CHECK: [[OUTER_LOOPEXIT:.*]]:
|
|
; CHECK-NEXT: br label %[[OUTER]]
|
|
; CHECK: [[OUTER]]:
|
|
; CHECK-NEXT: [[C:%.*]] = call i1 @cond()
|
|
; CHECK-NEXT: br i1 [[C]], label %[[LOOP_PREHEADER:.*]], [[EXIT:label %.*]]
|
|
; CHECK: [[LOOP_PREHEADER]]:
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SUB4]], [[INDEX]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
|
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP4]]
|
|
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[OUTER_LOOPEXIT]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
;
|
|
entry:
|
|
%sub = add i32 %N, -100
|
|
%sub4 = add i32 %N, -99
|
|
br label %outer
|
|
|
|
outer:
|
|
%c = call i1 @cond()
|
|
br i1 %c, label %loop, label %exit
|
|
|
|
loop:
|
|
%iv = phi i32 [ 0, %outer ], [ %inc, %loop ]
|
|
%add2 = add i32 %sub4, %iv
|
|
%add.ext = sext i32 %add2 to i64
|
|
%gep.dst = getelementptr i32, ptr %dst, i64 %add.ext
|
|
store i32 0, ptr %gep.dst, align 4
|
|
%inc = add i32 %iv, 1
|
|
%add = add i32 %sub, %inc
|
|
%ec = icmp sgt i32 %add, %N
|
|
br i1 %ec, label %outer, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|