llvm-project/llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll
Florian Hahn 40304d8fef
Reapply "[VPlan] Remove manual region removal when simplifying for VF and UF. (#181252)" (#188589)
This reverts commit e30f9c19464bcf1bf1e9f69b63884fb78ad2d05d.

Re-land, now that the reported crash causing the revert has been fixed
as part of 77fb84889 (#187504).

Original message:

Replace manual region dissolution code in
simplifyBranchConditionForVFAndUF with using general
removeBranchOnConst. simplifyBranchConditionForVFAndUF now just creates
a (BranchOnCond true) or updates BranchOnTwoConds.

The loop then gets automatically removed by running removeBranchOnConst.

This removes a bunch of special logic to handle header phi replacements
and CFG updates. With the new code, there's no restriction on what kind
of header phi recipes the loop contains.

Note that VPEVLBasedIVRecipe needs to be marked as readnone. This is
technically unrelated, but I could not find an independent test that
would be impacted.

The code to deal with epilogue resume values now needs updating, because
we may simplify a reduction directly to the start value.

PR: https://github.com/llvm/llvm-project/pull/181252
2026-03-26 10:14:10 +00:00

855 lines
40 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
; RUN: opt -S < %s -p loop-vectorize -force-vector-width=4 | FileCheck %s
;
@A = global [64 x i8] zeroinitializer
@B = global [64 x i8] zeroinitializer
@C = global [64 x i8] zeroinitializer
@D = global [64 x i8] zeroinitializer
define i64 @diamond_with_2_early_exits() {
; CHECK-LABEL: define i64 @diamond_with_2_early_exits() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: br label %[[BLOCK_A:.*]]
; CHECK: [[BLOCK_A]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true)
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i64>
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP10]]
; CHECK-NEXT: [[CMP_A:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[CMP_A]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP10]], i1 false)
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP14]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i64> [[TMP7]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP16]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP15]], %[[VECTOR_EARLY_EXIT_1]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%branch.cond = icmp slt i8 %l.A, 0
br i1 %branch.cond, label %block.a, label %block.b
block.a:
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%ext = zext i8 %l.B to i64
%cmp.a = icmp eq i8 %l.A, %l.B
br i1 %cmp.a, label %loop.end, label %loop.latch
block.b:
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%cmp.b = icmp eq i8 %l.A, %l.C
br i1 %cmp.b, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ %ext, %block.a ], [ %iv, %block.b ], [ 0, %loop.latch ]
ret i64 %retval
}
define i64 @three_early_exits() {
; CHECK-LABEL: define i64 @three_early_exits() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[CHECK_B:.*]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], splat (i8 -42)
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true)
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], splat (i8 42)
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP8]], <4 x i1> splat (i1 true), <4 x i1> [[TMP12]]
; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP17]]
; CHECK-NEXT: [[COND_A:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[COND_A]], label %[[BLOCK_A:.*]], label %[[CHECK_B]]
; CHECK: [[CHECK_B]]:
; CHECK-NEXT: br i1 [[TMP20]], label %[[BLOCK_B:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[BLOCK_B]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[BLOCK_A]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false)
; CHECK-NEXT: [[CMP_B:%.*]] = extractelement <4 x i1> [[TMP8]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[CMP_B]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH:.*]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP12]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_2]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 3, %[[LOOP_END]] ], [ 2, %[[VECTOR_EARLY_EXIT_1]] ], [ 1, %[[VECTOR_EARLY_EXIT_2]] ], [ 0, %[[BLOCK_B]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%cond.a = icmp slt i8 %l.A, -42
br i1 %cond.a, label %block.a, label %check.b
check.b:
%cond.b = icmp slt i8 %l.A, 42
br i1 %cond.b, label %block.b, label %block.c
block.a:
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%cmp.a = icmp eq i8 %l.A, %l.B
br i1 %cmp.a, label %loop.end, label %loop.latch
block.b:
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%cmp.b = icmp eq i8 %l.A, %l.C
br i1 %cmp.b, label %loop.end, label %loop.latch
block.c:
%gep.D = getelementptr inbounds i8, ptr @D, i64 %iv
%l.D = load i8, ptr %gep.D, align 1
%cmp.c = icmp eq i8 %l.A, %l.D
br i1 %cmp.c, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ 1, %block.a ], [ 2, %block.b ], [ 3, %block.c ], [ 0, %loop.latch ]
ret i64 %retval
}
define i64 @nested_diamond_inner_exits() {
; CHECK-LABEL: define i64 @nested_diamond_inner_exits() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: br label %[[BLOCK_A:.*]]
; CHECK: [[BLOCK_A]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BLOCK_A2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true)
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD2]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP13]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP12]]
; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP17]]
; CHECK-NEXT: [[INNER_COND:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[INNER_COND]], label %[[BLOCK_A1:.*]], label %[[BLOCK_A2]]
; CHECK: [[BLOCK_A2]]:
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_LATCH:.*]]
; CHECK: [[BLOCK_A1]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false)
; CHECK-NEXT: [[CMP_A1:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[CMP_A1]], label %[[LOOP_END:.*]], label %[[JOIN_A:.*]]
; CHECK: [[JOIN_A]]:
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP12]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[BLOCK_B:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_2]]:
; CHECK-NEXT: br label %[[LOOP_LATCH]]
; CHECK: [[BLOCK_B]]:
; CHECK-NEXT: br label %[[LOOP_LATCH]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: br label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 3, %[[LOOP_END]] ], [ 2, %[[BLOCK_B]] ], [ 1, %[[VECTOR_EARLY_EXIT_2]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%outer.cond = icmp slt i8 %l.A, 0
br i1 %outer.cond, label %block.a, label %block.b
block.a:
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%inner.cond = icmp slt i8 %l.B, 0
br i1 %inner.cond, label %block.a1, label %block.a2
block.a1:
%cmp.a1 = icmp eq i8 %l.A, %l.B
br i1 %cmp.a1, label %loop.end, label %join.a
block.a2:
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%cmp.a2 = icmp eq i8 %l.A, %l.C
br i1 %cmp.a2, label %loop.end, label %join.a
join.a:
br label %loop.latch
block.b:
%gep.D = getelementptr inbounds i8, ptr @D, i64 %iv
%l.D = load i8, ptr %gep.D, align 1
%cmp.b = icmp eq i8 %l.A, %l.D
br i1 %cmp.b, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ 1, %block.a1 ], [ 2, %block.a2 ], [ 3, %block.b ], [ 0, %loop.latch ]
ret i64 %retval
}
define i64 @chain_of_3_exits() {
; CHECK-LABEL: define i64 @chain_of_3_exits() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: br label %[[BLOCK_A:.*]]
; CHECK: [[BLOCK_A]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BLOCK_C:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_C]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> splat (i1 true), <4 x i1> [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> splat (i1 true), <4 x i1> [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = freeze <4 x i1> [[TMP12]]
; CHECK-NEXT: [[CMP_B:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[CMP_B]], label %[[LOOP_END:.*]], label %[[BLOCK_C]]
; CHECK: [[BLOCK_C]]:
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false)
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[LOOP_LATCH:.*]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP7]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP17]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_2]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 3, %[[VECTOR_EARLY_EXIT_2]] ], [ 2, %[[VECTOR_EARLY_EXIT_1]] ], [ 1, %[[VECTOR_EARLY_EXIT_0]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%cond.a = icmp slt i8 %l.A, 0
br i1 %cond.a, label %block.a, label %loop.latch
block.a:
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%cmp.a = icmp eq i8 %l.A, %l.B
br i1 %cmp.a, label %loop.end, label %block.b
block.b:
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%cmp.b = icmp eq i8 %l.A, %l.C
br i1 %cmp.b, label %loop.end, label %block.c
block.c:
%gep.D = getelementptr inbounds i8, ptr @D, i64 %iv
%l.D = load i8, ptr %gep.D, align 1
%cmp.c = icmp eq i8 %l.A, %l.D
br i1 %cmp.c, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ 1, %block.a ], [ 2, %block.b ], [ 3, %block.c ], [ 0, %loop.latch ]
ret i64 %retval
}
define i64 @four_exits_2x2_diamond() {
; CHECK-LABEL: define i64 @four_exits_2x2_diamond() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: br label %[[BRANCH1_A:.*]]
; CHECK: [[BRANCH1_A]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BRANCH2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true)
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1
; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD3]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true)
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP8]]
; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP13]]
; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> splat (i1 true), <4 x i1> [[TMP15]]
; CHECK-NEXT: [[TMP19:%.*]] = freeze <4 x i1> [[TMP18]]
; CHECK-NEXT: [[CMP1A:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP19]])
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[CMP1A]], label %[[LOOP_END:.*]], label %[[BRANCH2]]
; CHECK: [[BRANCH2]]:
; CHECK-NEXT: br i1 [[TMP21]], label %[[BRANCH2_A:.*]], label %[[BRANCH1_A]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[BRANCH2_A]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP18]], i1 false)
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[BRANCH2_B:.*]]
; CHECK: [[BRANCH2_B]]:
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP8]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP23]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[LOOP_LATCH:.*]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP13]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP24]], label %[[VECTOR_EARLY_EXIT_2:.*]], label %[[VECTOR_EARLY_EXIT_3:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_3]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_2]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 4, %[[VECTOR_EARLY_EXIT_2]] ], [ 3, %[[VECTOR_EARLY_EXIT_3]] ], [ 2, %[[VECTOR_EARLY_EXIT_0]] ], [ 1, %[[VECTOR_EARLY_EXIT_1]] ], [ 0, %[[BRANCH2_A]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%cond1 = icmp slt i8 %l.A, 0
br i1 %cond1, label %branch1.a, label %branch1.b
branch1.a:
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%cmp1a = icmp eq i8 %l.A, %l.B
br i1 %cmp1a, label %loop.end, label %branch2
branch1.b:
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%cmp1b = icmp eq i8 %l.A, %l.C
br i1 %cmp1b, label %loop.end, label %branch2
branch2:
%gep.D = getelementptr inbounds i8, ptr @D, i64 %iv
%l.D = load i8, ptr %gep.D, align 1
%cond2 = icmp slt i8 %l.D, 0
br i1 %cond2, label %branch2.a, label %branch2.b
branch2.a:
%cmp2a = icmp eq i8 %l.A, %l.D
br i1 %cmp2a, label %loop.end, label %loop.latch
branch2.b:
%cmp2b = icmp ne i8 %l.A, %l.D
br i1 %cmp2b, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ 1, %branch1.a ], [ 2, %branch1.b ], [ 3, %branch2.a ], [ 4, %branch2.b ], [ 0, %loop.latch ]
ret i64 %retval
}
; Diamond where neither branch exits directly. Both sides of the diamond
; merge to a block that then has an exit with a phi-dependent live-out value.
define i64 @diamond_merge_then_exit_with_phi_liveout() {
; CHECK-LABEL: define i64 @diamond_merge_then_exit_with_phi_liveout() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[WIDE_LOAD2]], <4 x i8> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[PREDPHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
; CHECK: [[VECTOR_BODY_INTERIM]]:
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END:.*]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i64>
; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i64>
; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP9]], <4 x i64> [[TMP8]]
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false)
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[PREDPHI3]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP10]], %[[VECTOR_EARLY_EXIT]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%cond = icmp slt i8 %l.A, 0
br i1 %cond, label %left, label %right
left:
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%val.left = zext i8 %l.B to i64
br label %merge
right:
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%val.right = zext i8 %l.C to i64
br label %merge
merge:
%val = phi i64 [ %val.left, %left ], [ %val.right, %right ]
%ld.for.cmp = phi i8 [ %l.B, %left ], [ %l.C, %right ]
%cmp = icmp eq i8 %l.A, %ld.for.cmp
br i1 %cmp, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ %val, %merge ], [ 0, %loop.latch ]
ret i64 %retval
}
; Diamond where both exit conditions compare l.A against l.B and l.C
; respectively. If l.B == l.C at runtime, both conditions could be true
; for the same lane, but the masking with cond/NOT cond prevents both
; from firing simultaneously. Tests that the predication correctly
; disambiguates the exits.
define i64 @diamond_exits_overlapping_conditions() {
; CHECK-LABEL: define i64 @diamond_exits_overlapping_conditions() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BLOCK_B:.*]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_C]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i1> splat (i1 true), <4 x i1> [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP9]]
; CHECK-NEXT: [[COND:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[COND]], label %[[BLOCK_A:.*]], label %[[BLOCK_B]]
; CHECK: [[BLOCK_B]]:
; CHECK-NEXT: br i1 [[TMP12]], label %[[LOOP_LATCH:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[BLOCK_A]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP9]], i1 false)
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP13]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[LOOP_END:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP15]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP14]], %[[LOOP_END]] ], [ 0, %[[LOOP_LATCH]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%cond = icmp slt i8 %l.A, 0
br i1 %cond, label %block.a, label %block.b
block.a:
%cmp.a = icmp eq i8 %l.A, %l.B
br i1 %cmp.a, label %loop.end, label %loop.latch
block.b:
%cmp.b = icmp eq i8 %l.A, %l.C
br i1 %cmp.b, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ %iv, %block.a ], [ %iv, %block.b ], [ 0, %loop.latch ]
ret i64 %retval
}
; Block C is reachable from both an exit-fallthrough path and a direct branch.
; block.a has an exit; if it doesn't exit, it falls through to block.c.
; The header's false branch goes directly to block.c.
; block.c then has its own exit.
define i64 @exit_from_merge_of_exit_fallthrough_and_bypass() {
; CHECK-LABEL: define i64 @exit_from_merge_of_exit_fallthrough_and_bypass() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: br label %[[BLOCK_A:.*]]
; CHECK: [[BLOCK_A]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_C]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> splat (i1 true), <4 x i1> [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]]
; CHECK-NEXT: [[CMP_C:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[CMP_C]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP7]], i1 false)
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP11]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 2, %[[VECTOR_EARLY_EXIT_1]] ], [ 1, %[[VECTOR_EARLY_EXIT_0]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%cond = icmp slt i8 %l.A, 0
br i1 %cond, label %block.a, label %block.c
block.a:
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%cmp.a = icmp eq i8 %l.A, %l.B
br i1 %cmp.a, label %loop.end, label %block.c
block.c:
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%cmp.c = icmp eq i8 %l.A, %l.C
br i1 %cmp.c, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ 1, %block.a ], [ 2, %block.c ], [ 0, %loop.latch ]
ret i64 %retval
}
; When the else branch is speculatively executed for iv < 2, `sub nuw` wraps
; producing poison. This poison condition is processed first in RPO.
; Test for https://github.com/llvm/llvm-project/issues/187061.
define i32 @diamond_exit_poison_from_speculated_branch() {
; CHECK-LABEL: define i32 @diamond_exit_poison_from_speculated_branch() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i1> splat (i1 true)
; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]])
; CHECK-NEXT: br i1 [[TMP1]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> splat (i1 true), i1 false)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> zeroinitializer, i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> <i32 10, i32 11, i32 1, i32 2>, i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: br label %[[UNREACHABLE_EXIT:.*]]
; CHECK: [[UNREACHABLE_EXIT]]:
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: unreachable
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[LOOP_END]] ]
; CHECK-NEXT: ret i32 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%cmp = icmp ult i32 %iv, 2
br i1 %cmp, label %then, label %else
then:
%lo.val = add i32 %iv, 10
br label %loop.exiting
else:
%sub = sub nuw i32 %iv, 2
%shl = shl nuw i32 1, %sub
%trap.cond = icmp eq i32 %shl, 999
br i1 %trap.cond, label %unreachable.exit, label %loop.exiting
unreachable.exit:
call void @llvm.trap()
unreachable
loop.exiting:
%val = phi i32 [ %lo.val, %then ], [ %shl, %else ]
%found.cond = icmp ult i32 %val, 12
br i1 %found.cond, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add nuw nsw i32 %iv, 1
%done = icmp eq i32 %iv.next, 4
br i1 %done, label %loop.end, label %loop.header
loop.end:
%retval = phi i32 [ %val, %loop.exiting ], [ -1, %loop.latch ]
ret i32 %retval
}
; Same as above but the poison exit condition (trap.cond from the speculated
; else branch) comes second in RPO. The first processed exit is from
; the then branch (RPO-before else).
define i32 @diamond_exit_poison_cond_second() {
; CHECK-LABEL: define i32 @diamond_exit_poison_cond_second() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i1> <i1 true, i1 false, i1 false, i1 false>
; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]])
; CHECK-NEXT: br i1 [[TMP1]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> <i1 true, i1 false, i1 false, i1 false>, i1 false)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> zeroinitializer, i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> <i32 10, i32 11, i32 12, i32 13>, i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: br label %[[UNREACHABLE_EXIT:.*]]
; CHECK: [[UNREACHABLE_EXIT]]:
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: unreachable
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[LOOP_END]] ]
; CHECK-NEXT: ret i32 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%cmp = icmp ult i32 %iv, 2
br i1 %cmp, label %then, label %else
then:
%val = add i32 %iv, 10
%found.cond = icmp ult i32 %val, 11
br i1 %found.cond, label %loop.end, label %loop.latch
else:
; sub nuw produces poison when speculatively executed for iv < 2.
%sub = sub nuw i32 %iv, 2
%shl = shl nuw i32 1, %sub
%trap.cond = icmp eq i32 %shl, 999
br i1 %trap.cond, label %unreachable.exit, label %loop.latch
unreachable.exit:
call void @llvm.trap()
unreachable
loop.latch:
%iv.next = add nuw nsw i32 %iv, 1
%done = icmp eq i32 %iv.next, 4
br i1 %done, label %loop.end, label %loop.header
loop.end:
%retval = phi i32 [ %val, %then ], [ -1, %loop.latch ]
ret i32 %retval
}
declare void @llvm.trap()