Mel Chen 8c6658aca6
[VPlan] Sink recipes from the vector loop region in licm. (#168031)
When a recipe can be safely sunk and all of its users are outside the
vector loop region in the same dedicated exit block, the recipe does not
need to be executed on every iteration.
This patch extends the VPlan-based LICM (Loop Invariant Code Motion) to
also sink such recipes from the vector loop region into the exit block.
This reduces redundant computation and improves cost model accuracy.

TODO: Support nested loop sinking
TODO: Support sinking `VPReplicateRecipe` (requires `replicateByVF`
fixes)
TODO: Support recipes with multiple defined values (e.g., interleaved
loads)
TODO: Clone recipes without users to all exit blocks
TODO: Support PHI node users by checking incoming value blocks
TODO: Support sinking when users are in multiple blocks
TODO: Clone recipes when users are on multiple exit paths

Co-authored-by: Luke Lau <luke@igalia.com>

---------

Co-authored-by: Luke Lau <luke@igalia.com>
Co-authored-by: Luke Lau <luke_lau@icloud.com>
2026-02-03 07:57:15 +00:00

806 lines
41 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --prefix-filecheck-ir-name unnamed --version 5
; RUN: opt -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s 2>&1 | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
@f = common global i32 0, align 4
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
@c = common global i32 0, align 4
@a = common global i32 0, align 4
@b = common global i32 0, align 4
@e = common global i32 0, align 4
; It has a value that is used outside of the loop
; and is not a recognized reduction variable "tmp17".
; However, tmp17 is a non-header phi which is an allowed exit.
define i32 @test1() {
; CHECK-LABEL: define i32 @test1() {
; CHECK-NEXT: [[BB:.*]]:
; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4)
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1>
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]]
; CHECK: [[_LR_PH_I]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ]
; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
; CHECK: [[_LR_PH_I1:.*:]]
; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ]
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]]
; CHECK: [[UNNAMEDBB10]]:
; CHECK-NEXT: br label %[[BB16]]
; CHECK: [[BB16]]:
; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ 0, %[[UNNAMEDBB10]] ], [ 1, %[[DOTLR_PH_I]] ]
; CHECK-NEXT: [[UNNAMEDTMP18]] = add nsw i32 [[UNNAMEDTMP8]], 1
; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[UNNAMEDTMP18]], 4
; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[F1_EXIT_LOOPEXIT]]:
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[DOTLCSSA]]
;
bb:
%b.promoted = load i32, ptr @b, align 4
br label %.lr.ph.i
.lr.ph.i:
%tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
%tmp2 = icmp sgt i32 %tmp8, 10
br i1 %tmp2, label %bb16, label %bb10
bb10:
br label %bb16
bb16:
%tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ]
%tmp18 = add nsw i32 %tmp8, 1
%tmp19 = icmp slt i32 %tmp18, 4
br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
f1.exit.loopexit:
%.lcssa = phi i32 [ %tmp17, %bb16 ]
ret i32 %.lcssa
}
; non-hdr phi depends on header phi.
define i32 @test2() {
; CHECK-LABEL: define i32 @test2() {
; CHECK-NEXT: [[BB:.*]]:
; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4)
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> splat (i32 1), <2 x i32> [[VEC_IND]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]]
; CHECK: [[_LR_PH_I]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ]
; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
; CHECK: [[_LR_PH_I1:.*:]]
; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ]
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]]
; CHECK: [[UNNAMEDBB10]]:
; CHECK-NEXT: br label %[[BB16]]
; CHECK: [[BB16]]:
; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ [[UNNAMEDTMP8]], %[[UNNAMEDBB10]] ], [ 1, %[[DOTLR_PH_I]] ]
; CHECK-NEXT: [[UNNAMEDTMP18]] = add nsw i32 [[UNNAMEDTMP8]], 1
; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[UNNAMEDTMP18]], 4
; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[F1_EXIT_LOOPEXIT]]:
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[DOTLCSSA]]
;
bb:
%b.promoted = load i32, ptr @b, align 4
br label %.lr.ph.i
.lr.ph.i:
%tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
%tmp2 = icmp sgt i32 %tmp8, 10
br i1 %tmp2, label %bb16, label %bb10
bb10:
br label %bb16
bb16:
%tmp17 = phi i32 [ %tmp8, %bb10 ], [ 1, %.lr.ph.i ]
%tmp18 = add nsw i32 %tmp8, 1
%tmp19 = icmp slt i32 %tmp18, 4
br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
f1.exit.loopexit:
%.lcssa = phi i32 [ %tmp17, %bb16 ]
ret i32 %.lcssa
}
; more than 2 incoming values for tmp17 phi that is used outside loop.
define i32 @test3(i32 %N) {
; CHECK-LABEL: define i32 @test3(
; CHECK-SAME: i32 [[N:%.*]]) {
; CHECK-NEXT: [[BB:.*]]:
; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4)
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP4:%.*]] = icmp sle <2 x i32> [[VEC_IND]], splat (i32 10)
; CHECK-NEXT: [[TMP5:%.*]] = icmp sle <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP5]], <2 x i1> zeroinitializer
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 2)
; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[PREDPHI]], <2 x i32> splat (i32 1)
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[PREDPHI1]], i32 1
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]]
; CHECK: [[_LR_PH_I1]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ]
; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
; CHECK: [[_LR_PH_I:.*:]]
; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ]
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]]
; CHECK: [[UNNAMEDBB10]]:
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[UNNAMEDBB12:.*]], label %[[BB16]]
; CHECK: [[UNNAMEDBB12]]:
; CHECK-NEXT: br label %[[BB16]]
; CHECK: [[BB16]]:
; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ 0, %[[UNNAMEDBB10]] ], [ 1, %[[DOTLR_PH_I]] ], [ 2, %[[UNNAMEDBB12]] ]
; CHECK-NEXT: [[UNNAMEDTMP18]] = add nsw i32 [[UNNAMEDTMP8]], 1
; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[UNNAMEDTMP18]], 4
; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[F1_EXIT_LOOPEXIT]]:
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[DOTLCSSA]]
;
bb:
%b.promoted = load i32, ptr @b, align 4
br label %.lr.ph.i
.lr.ph.i:
%tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
%tmp2 = icmp sgt i32 %tmp8, 10
br i1 %tmp2, label %bb16, label %bb10
bb10:
%cmp = icmp sgt i32 %tmp8, %N
br i1 %cmp, label %bb12, label %bb16
bb12:
br label %bb16
bb16:
%tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ], [ 2, %bb12 ]
%tmp18 = add nsw i32 %tmp8, 1
%tmp19 = icmp slt i32 %tmp18, 4
br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
f1.exit.loopexit:
%.lcssa = phi i32 [ %tmp17, %bb16 ]
ret i32 %.lcssa
}
; more than one incoming value for outside user: %.lcssa
define i32 @test4(i32 %N) {
; CHECK-LABEL: define i32 @test4(
; CHECK-SAME: i32 [[N:%.*]]) {
; CHECK-NEXT: [[BB:.*]]:
; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
; CHECK-NEXT: [[ICMP:%.*]] = icmp slt i32 [[B_PROMOTED]], [[N]]
; CHECK-NEXT: br i1 [[ICMP]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[DOTLR_PH_I_PREHEADER:.*]]
; CHECK: [[_LR_PH_I_PREHEADER:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4)
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]]
; CHECK: [[_LR_PH_I]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[DOTLR_PH_I_PREHEADER]] ]
; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
; CHECK: [[_LR_PH_I1:.*:]]
; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ]
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]]
; CHECK: [[UNNAMEDBB10]]:
; CHECK-NEXT: br label %[[BB16]]
; CHECK: [[BB16]]:
; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ 0, %[[UNNAMEDBB10]] ], [ 1, %[[DOTLR_PH_I]] ]
; CHECK-NEXT: [[UNNAMEDTMP18]] = add nsw i32 [[UNNAMEDTMP8]], 1
; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[UNNAMEDTMP18]], 4
; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[F1_EXIT_LOOPEXIT_LOOPEXIT]]:
; CHECK-NEXT: [[TMP17_LCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label %[[F1_EXIT_LOOPEXIT]]
; CHECK: [[F1_EXIT_LOOPEXIT]]:
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ 2, %[[BB]] ], [ [[TMP17_LCSSA]], %[[F1_EXIT_LOOPEXIT_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[DOTLCSSA]]
;
bb:
%b.promoted = load i32, ptr @b, align 4
%icmp = icmp slt i32 %b.promoted, %N
br i1 %icmp, label %f1.exit.loopexit, label %.lr.ph.i
.lr.ph.i:
%tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
%tmp2 = icmp sgt i32 %tmp8, 10
br i1 %tmp2, label %bb16, label %bb10
bb10:
br label %bb16
bb16:
%tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ]
%tmp18 = add nsw i32 %tmp8, 1
%tmp19 = icmp slt i32 %tmp18, 4
br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
f1.exit.loopexit:
%.lcssa = phi i32 [ %tmp17, %bb16 ], [ 2, %bb ]
ret i32 %.lcssa
}
; non hdr phi that depends on reduction and is used outside the loop.
; reduction phis are only allowed to have bump or reduction operations as the inside user, so we should
; not vectorize this.
define i32 @reduction_sum(i32 %n, ptr noalias nocapture %A, ptr noalias nocapture %B) nounwind uwtable readonly noinline ssp {
; CHECK-LABEL: define i32 @reduction_sum(
; CHECK-SAME: i32 [[N:%.*]], ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[N]], 0
; CHECK-NEXT: br i1 [[C1]], label %[[HEADER_PREHEADER:.*]], [[DOT_CRIT_EDGE:label %.*]]
; CHECK: [[HEADER_PREHEADER]]:
; CHECK-NEXT: br label %[[HEADER:.*]]
; CHECK: [[HEADER]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[BB16:.*]] ], [ 0, %[[HEADER_PREHEADER]] ]
; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[C9:%.*]], %[[BB16]] ], [ 0, %[[HEADER_PREHEADER]] ]
; CHECK-NEXT: [[C2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[C3:%.*]] = load i32, ptr [[C2]], align 4
; CHECK-NEXT: [[C4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[C5:%.*]] = load i32, ptr [[C4]], align 4
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[SUM_02]], 10
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]]
; CHECK: [[UNNAMEDBB10]]:
; CHECK-NEXT: br label %[[BB16]]
; CHECK: [[BB16]]:
; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ [[SUM_02]], %[[UNNAMEDBB10]] ], [ 1, %[[HEADER]] ]
; CHECK-NEXT: [[C6:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[C7:%.*]] = add i32 [[SUM_02]], [[C6]]
; CHECK-NEXT: [[C8:%.*]] = add i32 [[C7]], [[C3]]
; CHECK-NEXT: [[C9]] = add i32 [[C8]], [[C5]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[DOT_CRIT_EDGE_LOOPEXIT:.*]], label %[[HEADER]]
; CHECK: [[__CRIT_EDGE_LOOPEXIT:.*:]]
; CHECK-NEXT: [[TMP17_LCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ]
; CHECK-NEXT: [[C9_LCSSA:%.*]] = phi i32 [ [[C9]], %[[BB16]] ]
; CHECK-NEXT: br [[DOT_CRIT_EDGE]]
; CHECK: [[__CRIT_EDGE:.*:]]
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[C9_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ]
; CHECK-NEXT: [[NONHDR_LCSSA:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[TMP17_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
;
entry:
%c1 = icmp sgt i32 %n, 0
br i1 %c1, label %header, label %._crit_edge
header: ; preds = %0, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %bb16 ], [ 0, %entry ]
%sum.02 = phi i32 [ %c9, %bb16 ], [ 0, %entry ]
%c2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%c3 = load i32, ptr %c2, align 4
%c4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
%c5 = load i32, ptr %c4, align 4
%tmp2 = icmp sgt i32 %sum.02, 10
br i1 %tmp2, label %bb16, label %bb10
bb10:
br label %bb16
bb16:
%tmp17 = phi i32 [ %sum.02, %bb10 ], [ 1, %header ]
%c6 = trunc i64 %indvars.iv to i32
%c7 = add i32 %sum.02, %c6
%c8 = add i32 %c7, %c3
%c9 = add i32 %c8, %c5
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %._crit_edge, label %header
._crit_edge: ; preds = %.lr.ph, %0
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %c9, %bb16 ]
%nonhdr.lcssa = phi i32 [ 1, %entry], [ %tmp17, %bb16 ]
ret i32 %sum.0.lcssa
}
; invalid cyclic dependency with header phi iv, which prevents iv from being
; recognized as induction var.
; cannot vectorize.
define i32 @cyclic_dep_with_indvar() {
; CHECK-LABEL: define i32 @cyclic_dep_with_indvar() {
; CHECK-NEXT: [[BB:.*]]:
; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
; CHECK: [[_LR_PH_I:.*:]]
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[BB16:.*]] ], [ [[B_PROMOTED]], %[[BB]] ]
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[IV]], 10
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]]
; CHECK: [[UNNAMEDBB10]]:
; CHECK-NEXT: br label %[[BB16]]
; CHECK: [[BB16]]:
; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ 0, %[[UNNAMEDBB10]] ], [ [[IV]], %[[DOTLR_PH_I]] ]
; CHECK-NEXT: [[IVNEXT]] = add nsw i32 [[UNNAMEDTMP17]], 1
; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[IVNEXT]], 4
; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT:.*]]
; CHECK: [[F1_EXIT_LOOPEXIT]]:
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ]
; CHECK-NEXT: ret i32 [[DOTLCSSA]]
;
bb:
%b.promoted = load i32, ptr @b, align 4
br label %.lr.ph.i
.lr.ph.i:
%iv = phi i32 [ %ivnext, %bb16 ], [ %b.promoted, %bb ]
%tmp2 = icmp sgt i32 %iv, 10
br i1 %tmp2, label %bb16, label %bb10
bb10:
br label %bb16
bb16:
%tmp17 = phi i32 [ 0, %bb10 ], [ %iv, %.lr.ph.i ]
%ivnext = add nsw i32 %tmp17, 1
%tmp19 = icmp slt i32 %ivnext, 4
br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
f1.exit.loopexit:
%.lcssa = phi i32 [ %tmp17, %bb16 ]
ret i32 %.lcssa
}
; non-reduction phi 'tmp17' used outside loop has cyclic dependence with %x.05 phi
; cannot vectorize.
define i32 @not_valid_reduction(i32 %n, ptr noalias nocapture %A) nounwind uwtable readonly {
; CHECK-LABEL: define i32 @not_valid_reduction(
; CHECK-SAME: i32 [[N:%.*]], ptr noalias captures(none) [[A:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N]], 0
; CHECK-NEXT: br i1 [[CMP4]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
; CHECK: [[FOR_BODY_PREHEADER]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LATCH:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[X_05:%.*]] = phi i32 [ [[UNNAMEDTMP17:%.*]], %[[LATCH]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[UNNAMEDTMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i64 [[INDVARS_IV]], 10
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X_05]], [[UNNAMEDTMP0]]
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[UNNAMEDBB16:.*]], label %[[UNNAMEDBB10:.*]]
; CHECK: [[UNNAMEDBB10]]:
; CHECK-NEXT: br label %[[UNNAMEDBB16]]
; CHECK: [[UNNAMEDBB16]]:
; CHECK-NEXT: [[UNNAMEDTMP17]] = phi i32 [ 1, %[[UNNAMEDBB10]] ], [ [[SUB]], %[[FOR_BODY]] ]
; CHECK-NEXT: br label %[[LATCH]]
; CHECK: [[LATCH]]:
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[FOR_END_LOOPEXIT]]:
; CHECK-NEXT: [[TMP17_LCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[LATCH]] ]
; CHECK-NEXT: br label %[[FOR_END]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP17_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[X_0_LCSSA]]
;
entry:
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
%x.05 = phi i32 [ %tmp17, %latch ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%tmp0 = load i32, ptr %arrayidx, align 4
%tmp2 = icmp sgt i64 %indvars.iv, 10
%sub = sub nsw i32 %x.05, %tmp0
br i1 %tmp2, label %bb16, label %bb10
bb10:
br label %bb16
bb16:
%tmp17 = phi i32 [ 1, %bb10 ], [ %sub, %for.body ]
br label %latch
latch:
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%x.0.lcssa = phi i32 [ 0, %entry ], [ %tmp17 , %latch ]
ret i32 %x.0.lcssa
}
define i8 @outside_user_non_phi() {
; CHECK-LABEL: define i8 @outside_user_non_phi() {
; CHECK-NEXT: [[BB:.*]]:
; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4)
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[PREDPHI]] to <2 x i8>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP4]], i32 1
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]]
; CHECK: [[_LR_PH_I]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ]
; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
; CHECK: [[_LR_PH_I1:.*:]]
; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ]
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]]
; CHECK: [[UNNAMEDBB10]]:
; CHECK-NEXT: br label %[[BB16]]
; CHECK: [[BB16]]:
; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ 0, %[[UNNAMEDBB10]] ], [ 1, %[[DOTLR_PH_I]] ]
; CHECK-NEXT: [[TMP17_TRUNC:%.*]] = trunc i32 [[UNNAMEDTMP17]] to i8
; CHECK-NEXT: [[UNNAMEDTMP18]] = add nsw i32 [[UNNAMEDTMP8]], 1
; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[UNNAMEDTMP18]], 4
; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: [[F1_EXIT_LOOPEXIT]]:
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i8 [ [[TMP17_TRUNC]], %[[BB16]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i8 [[DOTLCSSA]]
;
bb:
%b.promoted = load i32, ptr @b, align 4
br label %.lr.ph.i
.lr.ph.i:
%tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
%tmp2 = icmp sgt i32 %tmp8, 10
br i1 %tmp2, label %bb16, label %bb10
bb10:
br label %bb16
bb16:
%tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ]
%tmp17.trunc = trunc i32 %tmp17 to i8
%tmp18 = add nsw i32 %tmp8, 1
%tmp19 = icmp slt i32 %tmp18, 4
br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
f1.exit.loopexit:
%.lcssa = phi i8 [ %tmp17.trunc, %bb16 ]
ret i8 %.lcssa
}
define i32 @no_vectorize_reduction_with_outside_use(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable readonly {
; CHECK-LABEL: define i32 @no_vectorize_reduction_with_outside_use(
; CHECK-SAME: i32 [[N:%.*]], ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N]], 0
; CHECK-NEXT: br i1 [[CMP7]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
; CHECK: [[FOR_BODY_PREHEADER]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[OR:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[OR]] = or i32 [[ADD]], [[RESULT_08]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[FOR_END_LOOPEXIT]]:
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP1]], %[[FOR_BODY]] ]
; CHECK-NEXT: br label %[[FOR_END]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DOTLCSSA]], %[[FOR_END_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
;
entry:
%cmp7 = icmp sgt i32 %n, 0
br i1 %cmp7, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%0 = load i32, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
%1 = load i32, ptr %arrayidx2, align 4
%add = add nsw i32 %1, %0
%or = or i32 %add, %result.08
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%result.0.lcssa = phi i32 [ 0, %entry ], [ %1, %for.body ]
ret i32 %result.0.lcssa
}
; vectorize c[i] = a[i] + b[i] loop where result of c[i] is used outside the
; loop
define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) {
; CHECK-LABEL: define i32 @sum_arrays_outside_use(
; CHECK-SAME: ptr [[B:%.*]], ptr [[A:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) {
; CHECK-NEXT: [[BB:.*]]:
; CHECK-NEXT: [[A3:%.*]] = ptrtoint ptr [[A]] to i32
; CHECK-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i32
; CHECK-NEXT: [[C1:%.*]] = ptrtoint ptr [[C]] to i32
; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[B_PROMOTED]], 1
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]])
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[C1]], [[B2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[C1]], [[A3]]
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i32 [[TMP3]], 8
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[_LR_PH_I]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[B_PROMOTED]], [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[OFFSET_IDX]] to i64
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP6]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x i32>, ptr [[TMP9]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], [[WIDE_LOAD5]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP6]]
; CHECK-NEXT: store <2 x i32> [[TMP11]], ptr [[TMP12]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]]
; CHECK: [[_LR_PH_I]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ], [ [[B_PROMOTED]], %[[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
; CHECK: [[_LR_PH_I1:.*:]]
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ]
; CHECK-NEXT: [[INDVARS_IV:%.*]] = sext i32 [[IV]] to i64
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[BLOAD:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[ALOAD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[SUM:%.*]] = add nsw i32 [[BLOAD]], [[ALOAD]]
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 [[SUM]], ptr [[ARRAYIDX3]], align 4
; CHECK-NEXT: [[IVNEXT]] = add nsw i32 [[IV]], 1
; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[IVNEXT]], [[N]]
; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: [[F1_EXIT_LOOPEXIT]]:
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[SUM]], %[[DOTLR_PH_I]] ], [ [[TMP15]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[DOTLCSSA]]
;
bb:
%b.promoted = load i32, ptr @b, align 4
br label %.lr.ph.i
.lr.ph.i:
%iv = phi i32 [ %ivnext, %.lr.ph.i ], [ %b.promoted, %bb ]
%indvars.iv = sext i32 %iv to i64
%arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
%Bload = load i32, ptr %arrayidx2, align 4
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%Aload = load i32, ptr %arrayidx, align 4
%sum = add nsw i32 %Bload, %Aload
%arrayidx3 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
store i32 %sum, ptr %arrayidx3, align 4
%ivnext = add nsw i32 %iv, 1
%tmp19 = icmp slt i32 %ivnext, %N
br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
f1.exit.loopexit:
%.lcssa = phi i32 [ %sum, %.lr.ph.i ]
ret i32 %.lcssa
}
@tab = common global [32 x i8] zeroinitializer, align 1
define i32 @non_uniform_live_out() {
; CHECK-LABEL: define i32 @non_uniform_live_out() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 7)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i8> [[WIDE_LOAD]], splat (i8 1)
; CHECK-NEXT: store <2 x i8> [[TMP4]], ptr [[TMP2]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20000
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 20000, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[I_09:%.*]] = add i32 [[I_08]], 7
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_09]]
; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[BUMP:%.*]] = add i8 [[TMP7]], 1
; CHECK-NEXT: store i8 [[BUMP]], ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 20000
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ [[I_09]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX_OUT:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[LCSSA]]
; CHECK-NEXT: store i8 42, ptr [[ARRAYIDX_OUT]], align 1
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%i.09 = add i32 %i.08, 7
%arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.09
%0 = load i8, ptr %arrayidx, align 1
%bump = add i8 %0, 1
store i8 %bump, ptr %arrayidx, align 1
%inc = add nsw i32 %i.08, 1
%exitcond = icmp eq i32 %i.08, 20000
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
%lcssa = phi i32 [%i.09, %for.body]
%arrayidx.out = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %lcssa
store i8 42, ptr %arrayidx.out, align 1
ret i32 0
}
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
;.