llvm-project/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
Florian Hahn d4a8fc3a87
[VPlan] Introduce and use BranchOnCount VPInstruction.
This patch adds a new BranchOnCount VPInstruction opcode with 2
operands. It first compares its 2 operands (increment of canonical
induction and vector trip count), followed by a branch to either the
exit block or back to the vector header.

It must be the last recipe in the exit block of the topmost vector loop
region.

This extracts parts from D113224 and was discussed in D113223.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D116479
2022-01-12 13:42:13 +00:00

7001 lines
495 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -dce -instcombine -S | FileCheck %s --check-prefix=UNROLL
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC
; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S | FileCheck %s --check-prefix=UNROLL-NO-VF
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s --check-prefix=SINK-AFTER
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s --check-prefix=NO-SINK-AFTER
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
; void recurrence_1(int *a, int *b, int n) {
; for(int i = 0; i < n; i++)
; b[i] = a[i] + a[i - 1]
; }
;
;
;
define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) {
; CHECK-LABEL: @recurrence_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_PREHEADER:%.*]]
; CHECK: for.preheader:
; CHECK-NEXT: [[PRE_LOAD:%.*]] = load i32, i32* [[A:%.*]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP5]]
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[A]], i64 1
; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 2
; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP6]]
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP5]], [[B]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i32* [[SCEVGEP3]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i64 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4, !alias.scope !0
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* [[TMP13]], align 4, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[SCALAR_BODY:%.*]]
; CHECK: scalar.body:
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[SCALAR_BODY]] ]
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP15]] = load i32, i32* [[ARRAYIDX32]], align 4
; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[ADD35:%.*]] = add i32 [[TMP15]], [[SCALAR_RECUR]]
; CHECK-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: for.exit:
; CHECK-NEXT: ret void
;
; UNROLL-LABEL: @recurrence_1(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: br label [[FOR_PREHEADER:%.*]]
; UNROLL: for.preheader:
; UNROLL-NEXT: [[PRE_LOAD:%.*]] = load i32, i32* [[A:%.*]], align 4
; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
; UNROLL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; UNROLL: vector.memcheck:
; UNROLL-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1
; UNROLL-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
; UNROLL-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
; UNROLL-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP5]]
; UNROLL-NEXT: [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[A]], i64 1
; UNROLL-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 2
; UNROLL-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP6]]
; UNROLL-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP5]], [[B]]
; UNROLL-NEXT: [[BOUND1:%.*]] = icmp ult i32* [[SCEVGEP3]], [[SCEVGEP]]
; UNROLL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; UNROLL-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
; UNROLL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i64 3
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 1
; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
; UNROLL-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4, !alias.scope !0
; UNROLL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i64 4
; UNROLL-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
; UNROLL-NEXT: [[WIDE_LOAD7]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4, !alias.scope !0
; UNROLL-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; UNROLL-NEXT: [[TMP15:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP12]]
; UNROLL-NEXT: [[TMP16:%.*]] = add <4 x i32> [[WIDE_LOAD7]], [[TMP13]]
; UNROLL-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>*
; UNROLL-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4, !alias.scope !3, !noalias !0
; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i64 4
; UNROLL-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
; UNROLL-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP19]], align 4, !alias.scope !3, !noalias !0
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD7]], i64 3
; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL: scalar.body:
; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-NEXT: [[TMP21]] = load i32, i32* [[ARRAYIDX32]], align 4
; UNROLL-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; UNROLL-NEXT: [[ADD35:%.*]] = add i32 [[TMP21]], [[SCALAR_RECUR]]
; UNROLL-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
; UNROLL-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; UNROLL: for.exit:
; UNROLL-NEXT: ret void
;
; UNROLL-NO-IC-LABEL: @recurrence_1(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; UNROLL-NO-IC-NEXT: br label [[FOR_PREHEADER:%.*]]
; UNROLL-NO-IC: for.preheader:
; UNROLL-NO-IC-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
; UNROLL-NO-IC-NEXT: [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; UNROLL-NO-IC: vector.memcheck:
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
; UNROLL-NO-IC-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP5]]
; UNROLL-NO-IC-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; UNROLL-NO-IC-NEXT: [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[A]], i64 1
; UNROLL-NO-IC-NEXT: [[SCEVGEP34:%.*]] = bitcast i32* [[SCEVGEP3]] to i8*
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 2
; UNROLL-NO-IC-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP6]]
; UNROLL-NO-IC-NEXT: [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8*
; UNROLL-NO-IC-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
; UNROLL-NO-IC-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
; UNROLL-NO-IC-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; UNROLL-NO-IC-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP7]], 1
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP8]], 1
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]]
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4, !alias.scope !0
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 4
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD7]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4, !alias.scope !0
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP8]]
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP17]]
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add <4 x i32> [[WIDE_LOAD7]], [[TMP18]]
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* [[TMP24]], align 4, !alias.scope !3, !noalias !0
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 4
; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP26]], align 4, !alias.scope !3, !noalias !0
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD7]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD7]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-IC: scalar.body:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-NO-IC-NEXT: [[TMP28]] = load i32, i32* [[ARRAYIDX32]], align 4
; UNROLL-NO-IC-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: [[ADD35:%.*]] = add i32 [[TMP28]], [[SCALAR_RECUR]]
; UNROLL-NO-IC-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; UNROLL-NO-IC: for.exit:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @recurrence_1(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; UNROLL-NO-VF-NEXT: br label [[FOR_PREHEADER:%.*]]
; UNROLL-NO-VF: for.preheader:
; UNROLL-NO-VF-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
; UNROLL-NO-VF-NEXT: [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; UNROLL-NO-VF: vector.memcheck:
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
; UNROLL-NO-VF-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP5]]
; UNROLL-NO-VF-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; UNROLL-NO-VF-NEXT: [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[A]], i64 1
; UNROLL-NO-VF-NEXT: [[SCEVGEP34:%.*]] = bitcast i32* [[SCEVGEP3]] to i8*
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 2
; UNROLL-NO-VF-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP6]]
; UNROLL-NO-VF-NEXT: [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8*
; UNROLL-NO-VF-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
; UNROLL-NO-VF-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
; UNROLL-NO-VF-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; UNROLL-NO-VF-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-VF-NEXT: [[INDUCTION7:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[INDUCTION]], 1
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[INDUCTION7]], 1
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]]
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 4, !alias.scope !0
; UNROLL-NO-VF-NEXT: [[TMP12]] = load i32, i32* [[TMP10]], align 4, !alias.scope !0
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]]
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION7]]
; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = add i32 [[TMP11]], [[VECTOR_RECUR]]
; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = add i32 [[TMP12]], [[TMP11]]
; UNROLL-NO-VF-NEXT: store i32 [[TMP15]], i32* [[TMP13]], align 4, !alias.scope !3, !noalias !0
; UNROLL-NO-VF-NEXT: store i32 [[TMP16]], i32* [[TMP14]], align 4, !alias.scope !3, !noalias !0
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-VF: scalar.body:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-VF-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-NO-VF-NEXT: [[TMP18]] = load i32, i32* [[ARRAYIDX32]], align 4
; UNROLL-NO-VF-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: [[ADD35:%.*]] = add i32 [[TMP18]], [[SCALAR_RECUR]]
; UNROLL-NO-VF-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
; UNROLL-NO-VF-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; UNROLL-NO-VF: for.exit:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @recurrence_1(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; SINK-AFTER-NEXT: br label [[FOR_PREHEADER:%.*]]
; SINK-AFTER: for.preheader:
; SINK-AFTER-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
; SINK-AFTER-NEXT: [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
; SINK-AFTER-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; SINK-AFTER: vector.memcheck:
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1
; SINK-AFTER-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
; SINK-AFTER-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
; SINK-AFTER-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP5]]
; SINK-AFTER-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; SINK-AFTER-NEXT: [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[A]], i64 1
; SINK-AFTER-NEXT: [[SCEVGEP34:%.*]] = bitcast i32* [[SCEVGEP3]] to i8*
; SINK-AFTER-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 2
; SINK-AFTER-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP6]]
; SINK-AFTER-NEXT: [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8*
; SINK-AFTER-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
; SINK-AFTER-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
; SINK-AFTER-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; SINK-AFTER-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
; SINK-AFTER-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], 1
; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]]
; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
; SINK-AFTER-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4, !alias.scope !0
; SINK-AFTER-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP7]]
; SINK-AFTER-NEXT: [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP12]]
; SINK-AFTER-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 0
; SINK-AFTER-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
; SINK-AFTER-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP16]], align 4, !alias.scope !3, !noalias !0
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 2
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; SINK-AFTER: scalar.body:
; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; SINK-AFTER-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
; SINK-AFTER-NEXT: [[TMP18]] = load i32, i32* [[ARRAYIDX32]], align 4
; SINK-AFTER-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; SINK-AFTER-NEXT: [[ADD35:%.*]] = add i32 [[TMP18]], [[SCALAR_RECUR]]
; SINK-AFTER-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
; SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; SINK-AFTER: for.exit:
; SINK-AFTER-NEXT: ret void
;
; NO-SINK-AFTER-LABEL: @recurrence_1(
; NO-SINK-AFTER-NEXT: entry:
; NO-SINK-AFTER-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; NO-SINK-AFTER-NEXT: br label [[FOR_PREHEADER:%.*]]
; NO-SINK-AFTER: for.preheader:
; NO-SINK-AFTER-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
; NO-SINK-AFTER-NEXT: [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
; NO-SINK-AFTER-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; NO-SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
; NO-SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; NO-SINK-AFTER: vector.memcheck:
; NO-SINK-AFTER-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1
; NO-SINK-AFTER-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
; NO-SINK-AFTER-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
; NO-SINK-AFTER-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP5]]
; NO-SINK-AFTER-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; NO-SINK-AFTER-NEXT: [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[A]], i64 1
; NO-SINK-AFTER-NEXT: [[SCEVGEP34:%.*]] = bitcast i32* [[SCEVGEP3]] to i8*
; NO-SINK-AFTER-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 2
; NO-SINK-AFTER-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP6]]
; NO-SINK-AFTER-NEXT: [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8*
; NO-SINK-AFTER-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
; NO-SINK-AFTER-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
; NO-SINK-AFTER-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; NO-SINK-AFTER-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NO-SINK-AFTER: vector.ph:
; NO-SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; NO-SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i32 3
; NO-SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-SINK-AFTER: vector.body:
; NO-SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
; NO-SINK-AFTER-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], 1
; NO-SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]]
; NO-SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
; NO-SINK-AFTER-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
; NO-SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4, !alias.scope !0
; NO-SINK-AFTER-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; NO-SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP7]]
; NO-SINK-AFTER-NEXT: [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP12]]
; NO-SINK-AFTER-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 0
; NO-SINK-AFTER-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
; NO-SINK-AFTER-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP16]], align 4, !alias.scope !3, !noalias !0
; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; NO-SINK-AFTER-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; NO-SINK-AFTER: middle.block:
; NO-SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 2
; NO-SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
; NO-SINK-AFTER: scalar.ph:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; NO-SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; NO-SINK-AFTER: scalar.body:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; NO-SINK-AFTER-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
; NO-SINK-AFTER-NEXT: [[TMP18]] = load i32, i32* [[ARRAYIDX32]], align 4
; NO-SINK-AFTER-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; NO-SINK-AFTER-NEXT: [[ADD35:%.*]] = add i32 [[TMP18]], [[SCALAR_RECUR]]
; NO-SINK-AFTER-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
; NO-SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; NO-SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; NO-SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; NO-SINK-AFTER: for.exit:
; NO-SINK-AFTER-NEXT: ret void
;
entry:
br label %for.preheader
for.preheader:
%arrayidx.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 0
%pre_load = load i32, i32* %arrayidx.phi.trans.insert
br label %scalar.body
scalar.body:
%0 = phi i32 [ %pre_load, %for.preheader ], [ %1, %scalar.body ]
%indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%arrayidx32 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
%1 = load i32, i32* %arrayidx32
%arrayidx34 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
%add35 = add i32 %1, %0
store i32 %add35, i32* %arrayidx34
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.exit, label %scalar.body
for.exit:
ret void
}
; int recurrence_2(int *a, int n) {
; int minmax;
; for (int i = 0; i < n; ++i)
; minmax = min(minmax, max(a[i] - a[i-1], 0));
; return minmax;
; }
;
;
;
define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
; CHECK-LABEL: @recurrence_2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-NEXT: br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.preheader:
; CHECK-NEXT: [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i64 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[TMP6]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP8]]
; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP9]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP8]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP10]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[SCALAR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[MINMAX_0_LCSSA]]
; CHECK: scalar.body:
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[SCALAR_BODY]] ]
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; CHECK-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP13]] = load i32, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP13]], [[SCALAR_RECUR]]
; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
; CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
; CHECK-NEXT: [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
;
; UNROLL-LABEL: @recurrence_2(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
; UNROLL-NEXT: br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; UNROLL: for.preheader:
; UNROLL-NEXT: [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
; UNROLL-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; UNROLL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
; UNROLL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i64 3
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD2:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX]]
; UNROLL-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
; UNROLL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 4
; UNROLL-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
; UNROLL-NEXT: [[WIDE_LOAD2]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
; UNROLL-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP9:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP7]]
; UNROLL-NEXT: [[TMP10:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD2]], [[TMP8]]
; UNROLL-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[TMP9]], zeroinitializer
; UNROLL-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[TMP10]], zeroinitializer
; UNROLL-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP9]], <4 x i32> zeroinitializer
; UNROLL-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP12]], <4 x i32> [[TMP10]], <4 x i32> zeroinitializer
; UNROLL-NEXT: [[TMP15:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP13]]
; UNROLL-NEXT: [[TMP16:%.*]] = icmp slt <4 x i32> [[VEC_PHI1]], [[TMP14]]
; UNROLL-NEXT: [[TMP17]] = select <4 x i1> [[TMP15]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP13]]
; UNROLL-NEXT: [[TMP18]] = select <4 x i1> [[TMP16]], <4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP14]]
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP17]], [[TMP18]]
; UNROLL-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP17]], <4 x i32> [[TMP18]]
; UNROLL-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[RDX_MINMAX_SELECT]])
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 3
; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL: for.cond.cleanup.loopexit:
; UNROLL-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: br label [[FOR_COND_CLEANUP]]
; UNROLL: for.cond.cleanup:
; UNROLL-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
; UNROLL-NEXT: ret i32 [[MINMAX_0_LCSSA]]
; UNROLL: scalar.body:
; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
; UNROLL-NEXT: [[TMP21]] = load i32, i32* [[ARRAYIDX]], align 4
; UNROLL-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP21]], [[SCALAR_RECUR]]
; UNROLL-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
; UNROLL-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
; UNROLL-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
; UNROLL-NEXT: [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]]
; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
;
; UNROLL-NO-IC-LABEL: @recurrence_2(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
; UNROLL-NO-IC-NEXT: br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; UNROLL-NO-IC: for.preheader:
; UNROLL-NO-IC-NEXT: [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
; UNROLL-NO-IC-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD2:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 4
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP11]]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD2]], [[TMP12]]
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = icmp sgt <4 x i32> [[TMP13]], zeroinitializer
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp sgt <4 x i32> [[TMP14]], zeroinitializer
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> [[TMP13]], <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP16]], <4 x i32> [[TMP14]], <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP17]]
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = icmp slt <4 x i32> [[VEC_PHI1]], [[TMP18]]
; UNROLL-NO-IC-NEXT: [[TMP21]] = select <4 x i1> [[TMP19]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP17]]
; UNROLL-NO-IC-NEXT: [[TMP22]] = select <4 x i1> [[TMP20]], <4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP18]]
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP21]], [[TMP22]]
; UNROLL-NO-IC-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP21]], <4 x i32> [[TMP22]]
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[RDX_MINMAX_SELECT]])
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-IC: for.cond.cleanup.loopexit:
; UNROLL-NO-IC-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_COND_CLEANUP]]
; UNROLL-NO-IC: for.cond.cleanup:
; UNROLL-NO-IC-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[MINMAX_0_LCSSA]]
; UNROLL-NO-IC: scalar.body:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP25:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: [[TMP25]] = load i32, i32* [[ARRAYIDX]], align 4
; UNROLL-NO-IC-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP25]], [[SCALAR_RECUR]]
; UNROLL-NO-IC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
; UNROLL-NO-IC-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
; UNROLL-NO-IC-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
; UNROLL-NO-IC-NEXT: [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]]
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
;
; UNROLL-NO-VF-LABEL: @recurrence_2(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
; UNROLL-NO-VF-NEXT: br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; UNROLL-NO-VF: for.preheader:
; UNROLL-NO-VF-NEXT: [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
; UNROLL-NO-VF-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-VF-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDUCTION]]
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDUCTION1]]
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4
; UNROLL-NO-VF-NEXT: [[TMP6]] = load i32, i32* [[TMP4]], align 4
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sub nsw i32 [[TMP5]], [[VECTOR_RECUR]]
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sub nsw i32 [[TMP6]], [[TMP5]]
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP7]], 0
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], 0
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], i32 [[TMP7]], i32 0
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 0
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = icmp slt i32 [[VEC_PHI]], [[TMP11]]
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp slt i32 [[VEC_PHI2]], [[TMP12]]
; UNROLL-NO-VF-NEXT: [[TMP15]] = select i1 [[TMP13]], i32 [[VEC_PHI]], i32 [[TMP11]]
; UNROLL-NO-VF-NEXT: [[TMP16]] = select i1 [[TMP14]], i32 [[VEC_PHI2]], i32 [[TMP12]]
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP15]], [[TMP16]]
; UNROLL-NO-VF-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP15]], i32 [[TMP16]]
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-VF: for.cond.cleanup.loopexit:
; UNROLL-NO-VF-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_COND_CLEANUP]]
; UNROLL-NO-VF: for.cond.cleanup:
; UNROLL-NO-VF-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
; UNROLL-NO-VF-NEXT: ret i32 [[MINMAX_0_LCSSA]]
; UNROLL-NO-VF: scalar.body:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: [[TMP18]] = load i32, i32* [[ARRAYIDX]], align 4
; UNROLL-NO-VF-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP18]], [[SCALAR_RECUR]]
; UNROLL-NO-VF-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
; UNROLL-NO-VF-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
; UNROLL-NO-VF-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
; UNROLL-NO-VF-NEXT: [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]]
; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-VF-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
;
; SINK-AFTER-LABEL: @recurrence_2(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
; SINK-AFTER-NEXT: br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; SINK-AFTER: for.preheader:
; SINK-AFTER-NEXT: [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
; SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; SINK-AFTER-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
; SINK-AFTER-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
; SINK-AFTER-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP8:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP7]]
; SINK-AFTER-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i32> [[TMP8]], zeroinitializer
; SINK-AFTER-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP8]], <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: [[TMP11:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP10]]
; SINK-AFTER-NEXT: [[TMP12]] = select <4 x i1> [[TMP11]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP10]]
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP12]])
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 2
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; SINK-AFTER: for.cond.cleanup.loopexit:
; SINK-AFTER-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: br label [[FOR_COND_CLEANUP]]
; SINK-AFTER: for.cond.cleanup:
; SINK-AFTER-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
; SINK-AFTER-NEXT: ret i32 [[MINMAX_0_LCSSA]]
; SINK-AFTER: scalar.body:
; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
; SINK-AFTER-NEXT: [[TMP15]] = load i32, i32* [[ARRAYIDX]], align 4
; SINK-AFTER-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP15]], [[SCALAR_RECUR]]
; SINK-AFTER-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
; SINK-AFTER-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
; SINK-AFTER-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
; SINK-AFTER-NEXT: [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]]
; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
;
; NO-SINK-AFTER-LABEL: @recurrence_2(
; NO-SINK-AFTER-NEXT: entry:
; NO-SINK-AFTER-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
; NO-SINK-AFTER-NEXT: br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; NO-SINK-AFTER: for.preheader:
; NO-SINK-AFTER-NEXT: [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
; NO-SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; NO-SINK-AFTER-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; NO-SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
; NO-SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-SINK-AFTER: vector.ph:
; NO-SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; NO-SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
; NO-SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-SINK-AFTER: vector.body:
; NO-SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; NO-SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
; NO-SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
; NO-SINK-AFTER-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
; NO-SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
; NO-SINK-AFTER-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; NO-SINK-AFTER-NEXT: [[TMP8:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP7]]
; NO-SINK-AFTER-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i32> [[TMP8]], zeroinitializer
; NO-SINK-AFTER-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP8]], <4 x i32> zeroinitializer
; NO-SINK-AFTER-NEXT: [[TMP11:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP10]]
; NO-SINK-AFTER-NEXT: [[TMP12]] = select <4 x i1> [[TMP11]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP10]]
; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; NO-SINK-AFTER-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; NO-SINK-AFTER: middle.block:
; NO-SINK-AFTER-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP12]])
; NO-SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 2
; NO-SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; NO-SINK-AFTER: scalar.ph:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
; NO-SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; NO-SINK-AFTER: for.cond.cleanup.loopexit:
; NO-SINK-AFTER-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: br label [[FOR_COND_CLEANUP]]
; NO-SINK-AFTER: for.cond.cleanup:
; NO-SINK-AFTER-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
; NO-SINK-AFTER-NEXT: ret i32 [[MINMAX_0_LCSSA]]
; NO-SINK-AFTER: scalar.body:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[SCALAR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
; NO-SINK-AFTER-NEXT: [[TMP15]] = load i32, i32* [[ARRAYIDX]], align 4
; NO-SINK-AFTER-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP15]], [[SCALAR_RECUR]]
; NO-SINK-AFTER-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
; NO-SINK-AFTER-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
; NO-SINK-AFTER-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
; NO-SINK-AFTER-NEXT: [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]]
; NO-SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; NO-SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; NO-SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; NO-SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
;
entry:
%cmp27 = icmp sgt i32 %n, 0
br i1 %cmp27, label %for.preheader, label %for.cond.cleanup
for.preheader:
%arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 -1
%.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4
br label %scalar.body
for.cond.cleanup.loopexit:
%minmax.0.cond.lcssa = phi i32 [ %minmax.0.cond, %scalar.body ]
br label %for.cond.cleanup
for.cond.cleanup:
%minmax.0.lcssa = phi i32 [ poison, %entry ], [ %minmax.0.cond.lcssa, %for.cond.cleanup.loopexit ]
ret i32 %minmax.0.lcssa
scalar.body:
%0 = phi i32 [ %.pre, %for.preheader ], [ %1, %scalar.body ]
%indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
%minmax.028 = phi i32 [ poison, %for.preheader ], [ %minmax.0.cond, %scalar.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%1 = load i32, i32* %arrayidx, align 4
%sub3 = sub nsw i32 %1, %0
%cmp4 = icmp sgt i32 %sub3, 0
%cond = select i1 %cmp4, i32 %sub3, i32 0
%cmp5 = icmp slt i32 %minmax.028, %cond
%minmax.0.cond = select i1 %cmp5, i32 %minmax.028, i32 %cond
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %scalar.body
}
; void recurrence_3(short *a, double *b, int n, float f, short p) {
; b[0] = (double)a[0] - f * (double)p;
; for (int i = 1; i < n; i++)
; b[i] = (double)a[i] - f * (double)a[i - 1];
; }
;
; Check also that the casts were not moved needlessly.
;
;
define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 %n, float %f, i16 %p) {
; CHECK-LABEL: @recurrence_3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
; CHECK-NEXT: [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
; CHECK-NEXT: [[CONV1:%.*]] = fpext float [[F:%.*]] to double
; CHECK-NEXT: [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
; CHECK-NEXT: [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
; CHECK-NEXT: store double [[SUB]], double* [[B:%.*]], align 8
; CHECK-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
; CHECK-NEXT: br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.preheader:
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[N]], -2
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 3
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[N]], -2
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]]
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1
; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[SCEVGEP6]] to double*
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult double* [[SCEVGEP]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[SCEVGEP2]] to i16*
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP4]], [[TMP8]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934588
; CHECK-NEXT: [[IND_END:%.*]] = or i64 [[N_VEC]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 3
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>*
; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !11
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP12:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
; CHECK-NEXT: [[TMP13:%.*]] = sitofp <4 x i16> [[TMP11]] to <4 x double>
; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT]], [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = fsub fast <4 x double> [[TMP12]], [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP17:%.*]] = bitcast double* [[TMP16]] to <4 x double>*
; CHECK-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP17]], align 8, !alias.scope !14, !noalias !11
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[VECTOR_MEMCHECK]] ], [ 1, [[FOR_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[SCALAR_BODY:%.*]]
; CHECK: scalar.body:
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[SCALAR_BODY]] ]
; CHECK-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
; CHECK-NEXT: [[TMP19]] = load i16, i16* [[ARRAYIDX5]], align 2
; CHECK-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP19]] to double
; CHECK-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
; CHECK-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
; CHECK-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
; CHECK-NEXT: store double [[SUB13]], double* [[ARRAYIDX15]], align 8
; CHECK-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; UNROLL-LABEL: @recurrence_3(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
; UNROLL-NEXT: [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
; UNROLL-NEXT: [[CONV1:%.*]] = fpext float [[F:%.*]] to double
; UNROLL-NEXT: [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
; UNROLL-NEXT: [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
; UNROLL-NEXT: [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
; UNROLL-NEXT: store double [[SUB]], double* [[B:%.*]], align 8
; UNROLL-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
; UNROLL-NEXT: br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
; UNROLL: for.preheader:
; UNROLL-NEXT: [[TMP1:%.*]] = add i32 [[N]], -2
; UNROLL-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; UNROLL-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 7
; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; UNROLL: vector.memcheck:
; UNROLL-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1
; UNROLL-NEXT: [[TMP4:%.*]] = add i32 [[N]], -2
; UNROLL-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
; UNROLL-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2
; UNROLL-NEXT: [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]]
; UNROLL-NEXT: [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1
; UNROLL-NEXT: [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]]
; UNROLL-NEXT: [[TMP7:%.*]] = bitcast i16* [[SCEVGEP6]] to double*
; UNROLL-NEXT: [[BOUND0:%.*]] = icmp ult double* [[SCEVGEP]], [[TMP7]]
; UNROLL-NEXT: [[TMP8:%.*]] = bitcast double* [[SCEVGEP2]] to i16*
; UNROLL-NEXT: [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP4]], [[TMP8]]
; UNROLL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; UNROLL-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934584
; UNROLL-NEXT: [[IND_END:%.*]] = or i64 [[N_VEC]], 1
; UNROLL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 3
; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
; UNROLL-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
; UNROLL-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT9]], <4 x double> poison, <4 x i32> zeroinitializer
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD8:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1
; UNROLL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[OFFSET_IDX]]
; UNROLL-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>*
; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !11
; UNROLL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[TMP9]], i64 4
; UNROLL-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>*
; UNROLL-NEXT: [[WIDE_LOAD8]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2, !alias.scope !11
; UNROLL-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP14:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD8]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP15:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
; UNROLL-NEXT: [[TMP16:%.*]] = sitofp <4 x i16> [[WIDE_LOAD8]] to <4 x double>
; UNROLL-NEXT: [[TMP17:%.*]] = sitofp <4 x i16> [[TMP13]] to <4 x double>
; UNROLL-NEXT: [[TMP18:%.*]] = sitofp <4 x i16> [[TMP14]] to <4 x double>
; UNROLL-NEXT: [[TMP19:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT]], [[TMP17]]
; UNROLL-NEXT: [[TMP20:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT10]], [[TMP18]]
; UNROLL-NEXT: [[TMP21:%.*]] = fsub fast <4 x double> [[TMP15]], [[TMP19]]
; UNROLL-NEXT: [[TMP22:%.*]] = fsub fast <4 x double> [[TMP16]], [[TMP20]]
; UNROLL-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[OFFSET_IDX]]
; UNROLL-NEXT: [[TMP24:%.*]] = bitcast double* [[TMP23]] to <4 x double>*
; UNROLL-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[TMP24]], align 8, !alias.scope !14, !noalias !11
; UNROLL-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 4
; UNROLL-NEXT: [[TMP26:%.*]] = bitcast double* [[TMP25]] to <4 x double>*
; UNROLL-NEXT: store <4 x double> [[TMP22]], <4 x double>* [[TMP26]], align 8, !alias.scope !14, !noalias !11
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD8]], i64 3
; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[VECTOR_MEMCHECK]] ], [ 1, [[FOR_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL: scalar.body:
; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
; UNROLL-NEXT: [[TMP28]] = load i16, i16* [[ARRAYIDX5]], align 2
; UNROLL-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP28]] to double
; UNROLL-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
; UNROLL-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
; UNROLL-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
; UNROLL-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
; UNROLL-NEXT: store double [[SUB13]], double* [[ARRAYIDX15]], align 8
; UNROLL-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
; UNROLL-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; UNROLL: for.end.loopexit:
; UNROLL-NEXT: br label [[FOR_END]]
; UNROLL: for.end:
; UNROLL-NEXT: ret void
;
; UNROLL-NO-IC-LABEL: @recurrence_3(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
; UNROLL-NO-IC-NEXT: [[CONV1:%.*]] = fpext float [[F:%.*]] to double
; UNROLL-NO-IC-NEXT: [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
; UNROLL-NO-IC-NEXT: [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
; UNROLL-NO-IC-NEXT: store double [[SUB]], double* [[B:%.*]], align 8
; UNROLL-NO-IC-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
; UNROLL-NO-IC-NEXT: br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
; UNROLL-NO-IC: for.preheader:
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[N]], -2
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; UNROLL-NO-IC: vector.memcheck:
; UNROLL-NO-IC-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1
; UNROLL-NO-IC-NEXT: [[SCEVGEP1:%.*]] = bitcast double* [[SCEVGEP]] to i8*
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[N]], -2
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2
; UNROLL-NO-IC-NEXT: [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]]
; UNROLL-NO-IC-NEXT: [[SCEVGEP23:%.*]] = bitcast double* [[SCEVGEP2]] to i8*
; UNROLL-NO-IC-NEXT: [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1
; UNROLL-NO-IC-NEXT: [[SCEVGEP45:%.*]] = bitcast i16* [[SCEVGEP4]] to i8*
; UNROLL-NO-IC-NEXT: [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]]
; UNROLL-NO-IC-NEXT: [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8*
; UNROLL-NO-IC-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]]
; UNROLL-NO-IC-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]]
; UNROLL-NO-IC-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; UNROLL-NO-IC-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i64 1, [[N_VEC]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT9]], <4 x double> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD8:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP8]]
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[TMP9]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>*
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2, !alias.scope !11
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, i16* [[TMP9]], i32 4
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = bitcast i16* [[TMP13]] to <4 x i16>*
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD8]] = load <4 x i16>, <4 x i16>* [[TMP14]], align 2, !alias.scope !11
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD8]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = sitofp <4 x i16> [[WIDE_LOAD8]] to <4 x double>
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = sitofp <4 x i16> [[TMP15]] to <4 x double>
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = sitofp <4 x i16> [[TMP16]] to <4 x double>
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = fmul fast <4 x double> [[TMP19]], [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = fmul fast <4 x double> [[TMP20]], [[BROADCAST_SPLAT10]]
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = fsub fast <4 x double> [[TMP17]], [[TMP21]]
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = fsub fast <4 x double> [[TMP18]], [[TMP22]]
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP8]]
; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = bitcast double* [[TMP27]] to <4 x double>*
; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP23]], <4 x double>* [[TMP28]], align 8, !alias.scope !14, !noalias !11
; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 4
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>*
; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP24]], <4 x double>* [[TMP30]], align 8, !alias.scope !14, !noalias !11
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD8]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD8]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ], [ 1, [[VECTOR_MEMCHECK]] ]
; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-IC: scalar.body:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP32:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
; UNROLL-NO-IC-NEXT: [[TMP32]] = load i16, i16* [[ARRAYIDX5]], align 2
; UNROLL-NO-IC-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP32]] to double
; UNROLL-NO-IC-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
; UNROLL-NO-IC-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
; UNROLL-NO-IC-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
; UNROLL-NO-IC-NEXT: store double [[SUB13]], double* [[ARRAYIDX15]], align 8
; UNROLL-NO-IC-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; UNROLL-NO-IC: for.end.loopexit:
; UNROLL-NO-IC-NEXT: br label [[FOR_END]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @recurrence_3(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
; UNROLL-NO-VF-NEXT: [[CONV1:%.*]] = fpext float [[F:%.*]] to double
; UNROLL-NO-VF-NEXT: [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
; UNROLL-NO-VF-NEXT: [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
; UNROLL-NO-VF-NEXT: store double [[SUB]], double* [[B:%.*]], align 8
; UNROLL-NO-VF-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
; UNROLL-NO-VF-NEXT: br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
; UNROLL-NO-VF: for.preheader:
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i32 [[N]], -2
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2
; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; UNROLL-NO-VF: vector.memcheck:
; UNROLL-NO-VF-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1
; UNROLL-NO-VF-NEXT: [[SCEVGEP1:%.*]] = bitcast double* [[SCEVGEP]] to i8*
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add i32 [[N]], -2
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2
; UNROLL-NO-VF-NEXT: [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]]
; UNROLL-NO-VF-NEXT: [[SCEVGEP23:%.*]] = bitcast double* [[SCEVGEP2]] to i8*
; UNROLL-NO-VF-NEXT: [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1
; UNROLL-NO-VF-NEXT: [[SCEVGEP45:%.*]] = bitcast i16* [[SCEVGEP4]] to i8*
; UNROLL-NO-VF-NEXT: [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]]
; UNROLL-NO-VF-NEXT: [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8*
; UNROLL-NO-VF-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]]
; UNROLL-NO-VF-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]]
; UNROLL-NO-VF-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; UNROLL-NO-VF-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = add i64 1, [[N_VEC]]
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0
; UNROLL-NO-VF-NEXT: [[INDUCTION8:%.*]] = add i64 [[OFFSET_IDX]], 1
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDUCTION]]
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDUCTION8]]
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP7]], align 2, !alias.scope !10
; UNROLL-NO-VF-NEXT: [[TMP10]] = load i16, i16* [[TMP8]], align 2, !alias.scope !10
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sitofp i16 [[TMP9]] to double
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sitofp i16 [[TMP10]] to double
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = sitofp i16 [[VECTOR_RECUR]] to double
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = sitofp i16 [[TMP9]] to double
; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = fmul fast double [[TMP13]], [[CONV1]]
; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = fmul fast double [[TMP14]], [[CONV1]]
; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = fsub fast double [[TMP11]], [[TMP15]]
; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = fsub fast double [[TMP12]], [[TMP16]]
; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDUCTION]]
; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDUCTION8]]
; UNROLL-NO-VF-NEXT: store double [[TMP17]], double* [[TMP19]], align 8, !alias.scope !13, !noalias !10
; UNROLL-NO-VF-NEXT: store double [[TMP18]], double* [[TMP20]], align 8, !alias.scope !13, !noalias !10
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ], [ 1, [[VECTOR_MEMCHECK]] ]
; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-VF: scalar.body:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP22:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
; UNROLL-NO-VF-NEXT: [[TMP22]] = load i16, i16* [[ARRAYIDX5]], align 2
; UNROLL-NO-VF-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP22]] to double
; UNROLL-NO-VF-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
; UNROLL-NO-VF-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
; UNROLL-NO-VF-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
; UNROLL-NO-VF-NEXT: store double [[SUB13]], double* [[ARRAYIDX15]], align 8
; UNROLL-NO-VF-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
; UNROLL-NO-VF-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; UNROLL-NO-VF: for.end.loopexit:
; UNROLL-NO-VF-NEXT: br label [[FOR_END]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @recurrence_3(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
; SINK-AFTER-NEXT: [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
; SINK-AFTER-NEXT: [[CONV1:%.*]] = fpext float [[F:%.*]] to double
; SINK-AFTER-NEXT: [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
; SINK-AFTER-NEXT: [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
; SINK-AFTER-NEXT: [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
; SINK-AFTER-NEXT: store double [[SUB]], double* [[B:%.*]], align 8
; SINK-AFTER-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
; SINK-AFTER-NEXT: br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
; SINK-AFTER: for.preheader:
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i32 [[N]], -2
; SINK-AFTER-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; SINK-AFTER: vector.memcheck:
; SINK-AFTER-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1
; SINK-AFTER-NEXT: [[SCEVGEP1:%.*]] = bitcast double* [[SCEVGEP]] to i8*
; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i32 [[N]], -2
; SINK-AFTER-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
; SINK-AFTER-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2
; SINK-AFTER-NEXT: [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]]
; SINK-AFTER-NEXT: [[SCEVGEP23:%.*]] = bitcast double* [[SCEVGEP2]] to i8*
; SINK-AFTER-NEXT: [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1
; SINK-AFTER-NEXT: [[SCEVGEP45:%.*]] = bitcast i16* [[SCEVGEP4]] to i8*
; SINK-AFTER-NEXT: [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]]
; SINK-AFTER-NEXT: [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8*
; SINK-AFTER-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]]
; SINK-AFTER-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]]
; SINK-AFTER-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; SINK-AFTER-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[IND_END:%.*]] = add i64 1, [[N_VEC]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0
; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; SINK-AFTER-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP7]]
; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0
; SINK-AFTER-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>*
; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !11
; SINK-AFTER-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP12:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
; SINK-AFTER-NEXT: [[TMP13:%.*]] = sitofp <4 x i16> [[TMP11]] to <4 x double>
; SINK-AFTER-NEXT: [[TMP14:%.*]] = fmul fast <4 x double> [[TMP13]], [[BROADCAST_SPLAT]]
; SINK-AFTER-NEXT: [[TMP15:%.*]] = fsub fast <4 x double> [[TMP12]], [[TMP14]]
; SINK-AFTER-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP7]]
; SINK-AFTER-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 0
; SINK-AFTER-NEXT: [[TMP18:%.*]] = bitcast double* [[TMP17]] to <4 x double>*
; SINK-AFTER-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP18]], align 8, !alias.scope !14, !noalias !11
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ], [ 1, [[VECTOR_MEMCHECK]] ]
; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; SINK-AFTER: scalar.body:
; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
; SINK-AFTER-NEXT: [[TMP20]] = load i16, i16* [[ARRAYIDX5]], align 2
; SINK-AFTER-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP20]] to double
; SINK-AFTER-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
; SINK-AFTER-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
; SINK-AFTER-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
; SINK-AFTER-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
; SINK-AFTER-NEXT: store double [[SUB13]], double* [[ARRAYIDX15]], align 8
; SINK-AFTER-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
; SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; SINK-AFTER: for.end.loopexit:
; SINK-AFTER-NEXT: br label [[FOR_END]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
; NO-SINK-AFTER-LABEL: @recurrence_3(
; NO-SINK-AFTER-NEXT: entry:
; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
; NO-SINK-AFTER-NEXT: [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
; NO-SINK-AFTER-NEXT: [[CONV1:%.*]] = fpext float [[F:%.*]] to double
; NO-SINK-AFTER-NEXT: [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
; NO-SINK-AFTER-NEXT: [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
; NO-SINK-AFTER-NEXT: [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
; NO-SINK-AFTER-NEXT: store double [[SUB]], double* [[B:%.*]], align 8
; NO-SINK-AFTER-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
; NO-SINK-AFTER-NEXT: br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
; NO-SINK-AFTER: for.preheader:
; NO-SINK-AFTER-NEXT: [[TMP1:%.*]] = add i32 [[N]], -2
; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; NO-SINK-AFTER-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; NO-SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
; NO-SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; NO-SINK-AFTER: vector.memcheck:
; NO-SINK-AFTER-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1
; NO-SINK-AFTER-NEXT: [[SCEVGEP1:%.*]] = bitcast double* [[SCEVGEP]] to i8*
; NO-SINK-AFTER-NEXT: [[TMP4:%.*]] = add i32 [[N]], -2
; NO-SINK-AFTER-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
; NO-SINK-AFTER-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2
; NO-SINK-AFTER-NEXT: [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]]
; NO-SINK-AFTER-NEXT: [[SCEVGEP23:%.*]] = bitcast double* [[SCEVGEP2]] to i8*
; NO-SINK-AFTER-NEXT: [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1
; NO-SINK-AFTER-NEXT: [[SCEVGEP45:%.*]] = bitcast i16* [[SCEVGEP4]] to i8*
; NO-SINK-AFTER-NEXT: [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]]
; NO-SINK-AFTER-NEXT: [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8*
; NO-SINK-AFTER-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]]
; NO-SINK-AFTER-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]]
; NO-SINK-AFTER-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; NO-SINK-AFTER-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NO-SINK-AFTER: vector.ph:
; NO-SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
; NO-SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; NO-SINK-AFTER-NEXT: [[IND_END:%.*]] = add i64 1, [[N_VEC]]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
; NO-SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0
; NO-SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
; NO-SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-SINK-AFTER: vector.body:
; NO-SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; NO-SINK-AFTER-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
; NO-SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP7]]
; NO-SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0
; NO-SINK-AFTER-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>*
; NO-SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !11
; NO-SINK-AFTER-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; NO-SINK-AFTER-NEXT: [[TMP12:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
; NO-SINK-AFTER-NEXT: [[TMP13:%.*]] = sitofp <4 x i16> [[TMP11]] to <4 x double>
; NO-SINK-AFTER-NEXT: [[TMP14:%.*]] = fmul fast <4 x double> [[TMP13]], [[BROADCAST_SPLAT]]
; NO-SINK-AFTER-NEXT: [[TMP15:%.*]] = fsub fast <4 x double> [[TMP12]], [[TMP14]]
; NO-SINK-AFTER-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP7]]
; NO-SINK-AFTER-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 0
; NO-SINK-AFTER-NEXT: [[TMP18:%.*]] = bitcast double* [[TMP17]] to <4 x double>*
; NO-SINK-AFTER-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP18]], align 8, !alias.scope !14, !noalias !11
; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; NO-SINK-AFTER-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; NO-SINK-AFTER: middle.block:
; NO-SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; NO-SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; NO-SINK-AFTER: scalar.ph:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ], [ 1, [[VECTOR_MEMCHECK]] ]
; NO-SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; NO-SINK-AFTER: scalar.body:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[SCALAR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; NO-SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
; NO-SINK-AFTER-NEXT: [[TMP20]] = load i16, i16* [[ARRAYIDX5]], align 2
; NO-SINK-AFTER-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP20]] to double
; NO-SINK-AFTER-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
; NO-SINK-AFTER-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
; NO-SINK-AFTER-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
; NO-SINK-AFTER-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
; NO-SINK-AFTER-NEXT: store double [[SUB13]], double* [[ARRAYIDX15]], align 8
; NO-SINK-AFTER-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
; NO-SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
; NO-SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; NO-SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; NO-SINK-AFTER: for.end.loopexit:
; NO-SINK-AFTER-NEXT: br label [[FOR_END]]
; NO-SINK-AFTER: for.end:
; NO-SINK-AFTER-NEXT: ret void
;
entry:
%0 = load i16, i16* %a, align 2
%conv = sitofp i16 %0 to double
%conv1 = fpext float %f to double
%conv2 = sitofp i16 %p to double
%mul = fmul fast double %conv2, %conv1
%sub = fsub fast double %conv, %mul
store double %sub, double* %b, align 8
%cmp25 = icmp sgt i32 %n, 1
br i1 %cmp25, label %for.preheader, label %for.end
for.preheader:
br label %scalar.body
scalar.body:
%1 = phi i16 [ %0, %for.preheader ], [ %2, %scalar.body ]
%advars.iv = phi i64 [ %advars.iv.next, %scalar.body ], [ 1, %for.preheader ]
%arrayidx5 = getelementptr inbounds i16, i16* %a, i64 %advars.iv
%2 = load i16, i16* %arrayidx5, align 2
%conv6 = sitofp i16 %2 to double
%conv11 = sitofp i16 %1 to double
%mul12 = fmul fast double %conv11, %conv1
%sub13 = fsub fast double %conv6, %mul12
%arrayidx15 = getelementptr inbounds double, double* %b, i64 %advars.iv
store double %sub13, double* %arrayidx15, align 8
%advars.iv.next = add nuw nsw i64 %advars.iv, 1
%lftr.wideiv = trunc i64 %advars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end.loopexit, label %scalar.body
for.end.loopexit:
br label %for.end
for.end:
ret void
}
; void PR26734(short *a, int *b, int *c, int d, short *e) {
; for (; d != 21; d++) {
; *b &= *c;
; *e = *a - 6;
; *c = *e;
; }
; }
;
;
define void @PR26734(i16* %a, i32* %b, i32* %c, i32 %d, i16* %e) {
; CHECK-LABEL: @PR26734(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
; CHECK-NEXT: br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
; CHECK: entry.for.end_crit_edge:
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: for.body.lr.ph:
; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
; CHECK-NEXT: [[SUB:%.*]] = add i16 [[TMP0]], -6
; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[SUB]] to i32
; CHECK-NEXT: [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, i32* [[B:%.*]], align 4
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
; CHECK-NEXT: [[AND]] = and i32 [[AND6]], [[CONV25]]
; CHECK-NEXT: [[INC]] = add nsw i32 [[INC7]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 21
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
; CHECK: for.cond.for.end_crit_edge:
; CHECK-NEXT: store i32 [[CONV2]], i32* [[C]], align 4
; CHECK-NEXT: store i32 [[AND]], i32* [[B]], align 4
; CHECK-NEXT: store i16 [[SUB]], i16* [[E:%.*]], align 2
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; UNROLL-LABEL: @PR26734(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
; UNROLL-NEXT: br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
; UNROLL: entry.for.end_crit_edge:
; UNROLL-NEXT: br label [[FOR_END:%.*]]
; UNROLL: for.body.lr.ph:
; UNROLL-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
; UNROLL-NEXT: [[SUB:%.*]] = add i16 [[TMP0]], -6
; UNROLL-NEXT: [[CONV2:%.*]] = sext i16 [[SUB]] to i32
; UNROLL-NEXT: [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
; UNROLL-NEXT: [[B_PROMOTED:%.*]] = load i32, i32* [[B:%.*]], align 4
; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL: for.body:
; UNROLL-NEXT: [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; UNROLL-NEXT: [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
; UNROLL-NEXT: [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
; UNROLL-NEXT: [[AND]] = and i32 [[AND6]], [[CONV25]]
; UNROLL-NEXT: [[INC]] = add nsw i32 [[INC7]], 1
; UNROLL-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 21
; UNROLL-NEXT: br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
; UNROLL: for.cond.for.end_crit_edge:
; UNROLL-NEXT: store i32 [[CONV2]], i32* [[C]], align 4
; UNROLL-NEXT: store i32 [[AND]], i32* [[B]], align 4
; UNROLL-NEXT: store i16 [[SUB]], i16* [[E:%.*]], align 2
; UNROLL-NEXT: br label [[FOR_END]]
; UNROLL: for.end:
; UNROLL-NEXT: ret void
;
; UNROLL-NO-IC-LABEL: @PR26734(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
; UNROLL-NO-IC-NEXT: br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
; UNROLL-NO-IC: entry.for.end_crit_edge:
; UNROLL-NO-IC-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[B:%.*]], align 4
; UNROLL-NO-IC-NEXT: br label [[FOR_END:%.*]]
; UNROLL-NO-IC: for.body.lr.ph:
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
; UNROLL-NO-IC-NEXT: [[SUB:%.*]] = add i16 [[TMP0]], -6
; UNROLL-NO-IC-NEXT: [[CONV2:%.*]] = sext i16 [[SUB]] to i32
; UNROLL-NO-IC-NEXT: [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
; UNROLL-NO-IC-NEXT: [[B_PROMOTED:%.*]] = load i32, i32* [[B]], align 4
; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-IC: for.body:
; UNROLL-NO-IC-NEXT: [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[AND]] = and i32 [[AND6]], [[CONV25]]
; UNROLL-NO-IC-NEXT: [[INC]] = add nsw i32 [[INC7]], 1
; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 21
; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
; UNROLL-NO-IC: for.cond.for.end_crit_edge:
; UNROLL-NO-IC-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: store i32 [[CONV2]], i32* [[C]], align 4
; UNROLL-NO-IC-NEXT: store i32 [[AND_LCSSA]], i32* [[B]], align 4
; UNROLL-NO-IC-NEXT: store i16 [[SUB]], i16* [[E:%.*]], align 2
; UNROLL-NO-IC-NEXT: br label [[FOR_END]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @PR26734(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
; UNROLL-NO-VF-NEXT: br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
; UNROLL-NO-VF: entry.for.end_crit_edge:
; UNROLL-NO-VF-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[B:%.*]], align 4
; UNROLL-NO-VF-NEXT: br label [[FOR_END:%.*]]
; UNROLL-NO-VF: for.body.lr.ph:
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
; UNROLL-NO-VF-NEXT: [[SUB:%.*]] = add i16 [[TMP0]], -6
; UNROLL-NO-VF-NEXT: [[CONV2:%.*]] = sext i16 [[SUB]] to i32
; UNROLL-NO-VF-NEXT: [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
; UNROLL-NO-VF-NEXT: [[B_PROMOTED:%.*]] = load i32, i32* [[B]], align 4
; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-VF: for.body:
; UNROLL-NO-VF-NEXT: [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[AND]] = and i32 [[AND6]], [[CONV25]]
; UNROLL-NO-VF-NEXT: [[INC]] = add nsw i32 [[INC7]], 1
; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 21
; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
; UNROLL-NO-VF: for.cond.for.end_crit_edge:
; UNROLL-NO-VF-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: store i32 [[CONV2]], i32* [[C]], align 4
; UNROLL-NO-VF-NEXT: store i32 [[AND_LCSSA]], i32* [[B]], align 4
; UNROLL-NO-VF-NEXT: store i16 [[SUB]], i16* [[E:%.*]], align 2
; UNROLL-NO-VF-NEXT: br label [[FOR_END]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @PR26734(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
; SINK-AFTER-NEXT: br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
; SINK-AFTER: entry.for.end_crit_edge:
; SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[B:%.*]], align 4
; SINK-AFTER-NEXT: br label [[FOR_END:%.*]]
; SINK-AFTER: for.body.lr.ph:
; SINK-AFTER-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
; SINK-AFTER-NEXT: [[SUB:%.*]] = add i16 [[TMP0]], -6
; SINK-AFTER-NEXT: [[CONV2:%.*]] = sext i16 [[SUB]] to i32
; SINK-AFTER-NEXT: [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
; SINK-AFTER-NEXT: [[B_PROMOTED:%.*]] = load i32, i32* [[B]], align 4
; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; SINK-AFTER: for.body:
; SINK-AFTER-NEXT: [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[AND]] = and i32 [[AND6]], [[CONV25]]
; SINK-AFTER-NEXT: [[INC]] = add nsw i32 [[INC7]], 1
; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 21
; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
; SINK-AFTER: for.cond.for.end_crit_edge:
; SINK-AFTER-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: store i32 [[CONV2]], i32* [[C]], align 4
; SINK-AFTER-NEXT: store i32 [[AND_LCSSA]], i32* [[B]], align 4
; SINK-AFTER-NEXT: store i16 [[SUB]], i16* [[E:%.*]], align 2
; SINK-AFTER-NEXT: br label [[FOR_END]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
; NO-SINK-AFTER-LABEL: @PR26734(
; NO-SINK-AFTER-NEXT: entry:
; NO-SINK-AFTER-NEXT: [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
; NO-SINK-AFTER-NEXT: br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
; NO-SINK-AFTER: entry.for.end_crit_edge:
; NO-SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[B:%.*]], align 4
; NO-SINK-AFTER-NEXT: br label [[FOR_END:%.*]]
; NO-SINK-AFTER: for.body.lr.ph:
; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
; NO-SINK-AFTER-NEXT: [[SUB:%.*]] = add i16 [[TMP0]], -6
; NO-SINK-AFTER-NEXT: [[CONV2:%.*]] = sext i16 [[SUB]] to i32
; NO-SINK-AFTER-NEXT: [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
; NO-SINK-AFTER-NEXT: [[B_PROMOTED:%.*]] = load i32, i32* [[B]], align 4
; NO-SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; NO-SINK-AFTER: for.body:
; NO-SINK-AFTER-NEXT: [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[AND]] = and i32 [[AND6]], [[CONV25]]
; NO-SINK-AFTER-NEXT: [[INC]] = add nsw i32 [[INC7]], 1
; NO-SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 21
; NO-SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
; NO-SINK-AFTER: for.cond.for.end_crit_edge:
; NO-SINK-AFTER-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: store i32 [[CONV2]], i32* [[C]], align 4
; NO-SINK-AFTER-NEXT: store i32 [[AND_LCSSA]], i32* [[B]], align 4
; NO-SINK-AFTER-NEXT: store i16 [[SUB]], i16* [[E:%.*]], align 2
; NO-SINK-AFTER-NEXT: br label [[FOR_END]]
; NO-SINK-AFTER: for.end:
; NO-SINK-AFTER-NEXT: ret void
;
entry:
%cmp4 = icmp eq i32 %d, 21
br i1 %cmp4, label %entry.for.end_crit_edge, label %for.body.lr.ph
entry.for.end_crit_edge:
%.pre = load i32, i32* %b, align 4
br label %for.end
for.body.lr.ph:
%0 = load i16, i16* %a, align 2
%sub = add i16 %0, -6
%conv2 = sext i16 %sub to i32
%c.promoted = load i32, i32* %c, align 4
%b.promoted = load i32, i32* %b, align 4
br label %for.body
for.body:
%inc7 = phi i32 [ %d, %for.body.lr.ph ], [ %inc, %for.body ]
%and6 = phi i32 [ %b.promoted, %for.body.lr.ph ], [ %and, %for.body ]
%conv25 = phi i32 [ %c.promoted, %for.body.lr.ph ], [ %conv2, %for.body ]
%and = and i32 %and6, %conv25
%inc = add nsw i32 %inc7, 1
%cmp = icmp eq i32 %inc, 21
br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body
for.cond.for.end_crit_edge:
%and.lcssa = phi i32 [ %and, %for.body ]
store i32 %conv2, i32* %c, align 4
store i32 %and.lcssa, i32* %b, align 4
store i16 %sub, i16* %e, align 2
br label %for.end
for.end:
ret void
}
; int PR27246() {
; unsigned int e, n;
; for (int i = 1; i < 49; ++i) {
; for (int k = i; k > 1; --k)
; e = k;
; n = e;
; }
; return n;
; }
;
;
define i32 @PR27246() {
; CHECK-LABEL: @PR27246(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
; CHECK: for.cond1.preheader:
; CHECK-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
; CHECK-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1:%.*]], [[FOR_COND_CLEANUP3]] ]
; CHECK-NEXT: br label [[FOR_COND1:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret i32 [[E_1]]
; CHECK: for.cond1:
; CHECK-NEXT: [[E_1]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
; CHECK-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
; CHECK-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
; CHECK: for.cond.cleanup3:
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
;
; UNROLL-LABEL: @PR27246(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
; UNROLL: for.cond1.preheader:
; UNROLL-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
; UNROLL-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1:%.*]], [[FOR_COND_CLEANUP3]] ]
; UNROLL-NEXT: br label [[FOR_COND1:%.*]]
; UNROLL: for.cond.cleanup:
; UNROLL-NEXT: ret i32 [[E_1]]
; UNROLL: for.cond1:
; UNROLL-NEXT: [[E_1]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
; UNROLL-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
; UNROLL-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
; UNROLL-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1
; UNROLL-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
; UNROLL: for.cond.cleanup3:
; UNROLL-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
;
; UNROLL-NO-IC-LABEL: @PR27246(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
; UNROLL-NO-IC: for.cond1.preheader:
; UNROLL-NO-IC-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
; UNROLL-NO-IC-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_COND1:%.*]]
; UNROLL-NO-IC: for.cond.cleanup:
; UNROLL-NO-IC-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[E_1_LCSSA_LCSSA]]
; UNROLL-NO-IC: for.cond1:
; UNROLL-NO-IC-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
; UNROLL-NO-IC-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
; UNROLL-NO-IC-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
; UNROLL-NO-IC-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1
; UNROLL-NO-IC-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
; UNROLL-NO-IC: for.cond.cleanup3:
; UNROLL-NO-IC-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ]
; UNROLL-NO-IC-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
;
; UNROLL-NO-VF-LABEL: @PR27246(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
; UNROLL-NO-VF: for.cond1.preheader:
; UNROLL-NO-VF-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
; UNROLL-NO-VF-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_COND1:%.*]]
; UNROLL-NO-VF: for.cond.cleanup:
; UNROLL-NO-VF-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
; UNROLL-NO-VF-NEXT: ret i32 [[E_1_LCSSA_LCSSA]]
; UNROLL-NO-VF: for.cond1:
; UNROLL-NO-VF-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
; UNROLL-NO-VF-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
; UNROLL-NO-VF-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
; UNROLL-NO-VF-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1
; UNROLL-NO-VF-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
; UNROLL-NO-VF: for.cond.cleanup3:
; UNROLL-NO-VF-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ]
; UNROLL-NO-VF-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
;
; SINK-AFTER-LABEL: @PR27246(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
; SINK-AFTER: for.cond1.preheader:
; SINK-AFTER-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
; SINK-AFTER-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
; SINK-AFTER-NEXT: br label [[FOR_COND1:%.*]]
; SINK-AFTER: for.cond.cleanup:
; SINK-AFTER-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
; SINK-AFTER-NEXT: ret i32 [[E_1_LCSSA_LCSSA]]
; SINK-AFTER: for.cond1:
; SINK-AFTER-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
; SINK-AFTER-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
; SINK-AFTER-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
; SINK-AFTER-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1
; SINK-AFTER-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
; SINK-AFTER: for.cond.cleanup3:
; SINK-AFTER-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ]
; SINK-AFTER-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
;
; NO-SINK-AFTER-LABEL: @PR27246(
; NO-SINK-AFTER-NEXT: entry:
; NO-SINK-AFTER-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
; NO-SINK-AFTER: for.cond1.preheader:
; NO-SINK-AFTER-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
; NO-SINK-AFTER-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
; NO-SINK-AFTER-NEXT: br label [[FOR_COND1:%.*]]
; NO-SINK-AFTER: for.cond.cleanup:
; NO-SINK-AFTER-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
; NO-SINK-AFTER-NEXT: ret i32 [[E_1_LCSSA_LCSSA]]
; NO-SINK-AFTER: for.cond1:
; NO-SINK-AFTER-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
; NO-SINK-AFTER-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
; NO-SINK-AFTER-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
; NO-SINK-AFTER-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1
; NO-SINK-AFTER-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
; NO-SINK-AFTER: for.cond.cleanup3:
; NO-SINK-AFTER-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ]
; NO-SINK-AFTER-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1
; NO-SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
; NO-SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
;
entry:
br label %for.cond1.preheader
for.cond1.preheader:
%i.016 = phi i32 [ 1, %entry ], [ %inc, %for.cond.cleanup3 ]
%e.015 = phi i32 [ poison, %entry ], [ %e.1.lcssa, %for.cond.cleanup3 ]
br label %for.cond1
for.cond.cleanup:
%e.1.lcssa.lcssa = phi i32 [ %e.1.lcssa, %for.cond.cleanup3 ]
ret i32 %e.1.lcssa.lcssa
for.cond1:
%e.1 = phi i32 [ %k.0, %for.cond1 ], [ %e.015, %for.cond1.preheader ]
%k.0 = phi i32 [ %dec, %for.cond1 ], [ %i.016, %for.cond1.preheader ]
%cmp2 = icmp sgt i32 %k.0, 1
%dec = add nsw i32 %k.0, -1
br i1 %cmp2, label %for.cond1, label %for.cond.cleanup3
for.cond.cleanup3:
%e.1.lcssa = phi i32 [ %e.1, %for.cond1 ]
%inc = add nuw nsw i32 %i.016, 1
%exitcond = icmp eq i32 %inc, 49
br i1 %exitcond, label %for.cond.cleanup, label %for.cond1.preheader
}
;
define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) {
; CHECK-LABEL: @PR30183(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -2
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 6
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[SCALAR_BODY:%.*]]
; CHECK: scalar.body:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2
; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; UNROLL-LABEL: @PR30183(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -2
; UNROLL-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
; UNROLL-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 14
; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -8
; UNROLL-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; UNROLL-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL: scalar.body:
; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2
; UNROLL-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
; UNROLL-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; UNROLL: for.end:
; UNROLL-NEXT: ret void
;
; UNROLL-NO-IC-LABEL: @PR30183(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -2
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD:%.*]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 8
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 10
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 12
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 14
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[TMP3]], 2
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[TMP4]], 2
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP5]], 2
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[TMP6]], 2
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[TMP7]], 2
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add nuw nsw i64 [[TMP8]], 2
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = add nuw nsw i64 [[TMP9]], 2
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = add nuw nsw i64 [[TMP10]], 2
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP11]]
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP12]]
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP14]]
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP15]]
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]]
; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]]
; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP19]], align 4
; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP20]], align 4
; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP21]], align 4
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP22]], align 4
; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP27]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP28]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 2
; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP30]], i32 3
; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP23]], align 4
; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP24]], align 4
; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP25]], align 4
; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP26]], align 4
; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = insertelement <4 x i32> poison, i32 [[TMP35]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> [[TMP39]], i32 [[TMP36]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP37]], i32 2
; UNROLL-NO-IC-NEXT: [[TMP42]] = insertelement <4 x i32> [[TMP41]], i32 [[TMP38]], i32 3
; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP34]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[TMP34]], <4 x i32> [[TMP42]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP45]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP42]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP42]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-IC: scalar.body:
; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2
; UNROLL-NO-IC-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]]
; UNROLL-NO-IC-NEXT: [[VAR2]] = load i32, i32* [[VAR1]], align 4
; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @PR30183(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -2
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD:%.*]], [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0
; UNROLL-NO-VF-NEXT: [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 2
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDUCTION]], 2
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[INDUCTION1]], 2
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]]
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
; UNROLL-NO-VF-NEXT: [[TMP8]] = load i32, i32* [[TMP6]], align 4
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[ENTRY:%.*]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-VF: scalar.body:
; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2
; UNROLL-NO-VF-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]]
; UNROLL-NO-VF-NEXT: [[VAR2]] = load i32, i32* [[VAR1]], align 4
; UNROLL-NO-VF-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @PR30183(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -2
; SINK-AFTER-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD:%.*]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2
; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4
; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6
; SINK-AFTER-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP3]], 2
; SINK-AFTER-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 2
; SINK-AFTER-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2
; SINK-AFTER-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 2
; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP7]]
; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]]
; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]]
; SINK-AFTER-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]]
; SINK-AFTER-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4
; SINK-AFTER-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP12]], align 4
; SINK-AFTER-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP13]], align 4
; SINK-AFTER-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP14]], align 4
; SINK-AFTER-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0
; SINK-AFTER-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP16]], i32 1
; SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP17]], i32 2
; SINK-AFTER-NEXT: [[TMP22]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP18]], i32 3
; SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP22]], i32 2
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; SINK-AFTER: scalar.body:
; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2
; SINK-AFTER-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]]
; SINK-AFTER-NEXT: [[VAR2]] = load i32, i32* [[VAR1]], align 4
; SINK-AFTER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
; SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
; NO-SINK-AFTER-LABEL: @PR30183(
; NO-SINK-AFTER-NEXT: entry:
; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -2
; NO-SINK-AFTER-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
; NO-SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
; NO-SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-SINK-AFTER: vector.ph:
; NO-SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; NO-SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; NO-SINK-AFTER-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD:%.*]], i32 3
; NO-SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-SINK-AFTER: vector.body:
; NO-SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
; NO-SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
; NO-SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2
; NO-SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4
; NO-SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6
; NO-SINK-AFTER-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP3]], 2
; NO-SINK-AFTER-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 2
; NO-SINK-AFTER-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2
; NO-SINK-AFTER-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 2
; NO-SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP7]]
; NO-SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]]
; NO-SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]]
; NO-SINK-AFTER-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]]
; NO-SINK-AFTER-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4
; NO-SINK-AFTER-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP12]], align 4
; NO-SINK-AFTER-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP13]], align 4
; NO-SINK-AFTER-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP14]], align 4
; NO-SINK-AFTER-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0
; NO-SINK-AFTER-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP16]], i32 1
; NO-SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP17]], i32 2
; NO-SINK-AFTER-NEXT: [[TMP22]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP18]], i32 3
; NO-SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; NO-SINK-AFTER-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; NO-SINK-AFTER: middle.block:
; NO-SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP22]], i32 2
; NO-SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; NO-SINK-AFTER: scalar.ph:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; NO-SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; NO-SINK-AFTER: scalar.body:
; NO-SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2
; NO-SINK-AFTER-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]]
; NO-SINK-AFTER-NEXT: [[VAR2]] = load i32, i32* [[VAR1]], align 4
; NO-SINK-AFTER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
; NO-SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; NO-SINK-AFTER: for.end:
; NO-SINK-AFTER-NEXT: ret void
;
entry:
br label %scalar.body
scalar.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %scalar.body ]
%var0 = phi i32 [ %pre_load, %entry ], [ %var2, %scalar.body ]
%i.next = add nuw nsw i64 %i, 2
%var1 = getelementptr inbounds i32, i32* %a, i64 %i.next
%var2 = load i32, i32* %var1
%cond = icmp eq i64 %i.next,%n
br i1 %cond, label %for.end, label %scalar.body
for.end:
ret void
}
;
define void @constant_folded_previous_value() {
; CHECK-LABEL: @constant_folded_previous_value(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 undef, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[SCALAR_BODY:%.*]]
; CHECK: scalar.body:
; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; UNROLL-LABEL: @constant_folded_previous_value(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: br i1 undef, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL: scalar.body:
; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; UNROLL: for.end:
; UNROLL-NEXT: ret void
;
; UNROLL-NO-IC-LABEL: @constant_folded_previous_value(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ <i64 1, i64 1, i64 1, i64 1>, [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
; UNROLL-NO-IC-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 1, [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-IC: scalar.body:
; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VAR3]] = add i64 0, 1
; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef
; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @constant_folded_previous_value(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-VF-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 0, 1
; UNROLL-NO-VF-NEXT: [[TMP1]] = add i64 0, 1
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-VF: scalar.body:
; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VAR3]] = add i64 0, 1
; UNROLL-NO-VF-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; UNROLL-NO-VF-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef
; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @constant_folded_previous_value(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ <i64 1, i64 1, i64 1, i64 1>, [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; SINK-AFTER-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
; SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 1, [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; SINK-AFTER: scalar.body:
; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[VAR3]] = add i64 0, 1
; SINK-AFTER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; SINK-AFTER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef
; SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
; NO-SINK-AFTER-LABEL: @constant_folded_previous_value(
; NO-SINK-AFTER-NEXT: entry:
; NO-SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-SINK-AFTER: vector.ph:
; NO-SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-SINK-AFTER: vector.body:
; NO-SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ <i64 1, i64 1, i64 1, i64 1>, [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; NO-SINK-AFTER-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
; NO-SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; NO-SINK-AFTER: middle.block:
; NO-SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef
; NO-SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; NO-SINK-AFTER: scalar.ph:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 1, [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; NO-SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; NO-SINK-AFTER: scalar.body:
; NO-SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VAR3]] = add i64 0, 1
; NO-SINK-AFTER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; NO-SINK-AFTER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef
; NO-SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; NO-SINK-AFTER: for.end:
; NO-SINK-AFTER-NEXT: ret void
;
entry:
br label %scalar.body
scalar.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %scalar.body ]
%var2 = phi i64 [ 0, %entry ], [ %var3, %scalar.body ]
%var3 = add i64 0, 1
%i.next = add nuw nsw i64 %i, 1
%cond = icmp eq i64 %i.next, undef
br i1 %cond, label %for.end, label %scalar.body
for.end:
ret void
}
; We vectorize this first order recurrence, by generating two
; extracts for the phi `val.phi` - one at the last index and
; another at the second last index. We need these 2 extracts because
; the first order recurrence phi is used outside the loop, so we require the phi
; itself and not its update (addx).
; Check the case when unrolled but not vectorized.
define i32 @extract_second_last_iteration(i32* %cval, i32 %x) {
; CHECK-LABEL: @extract_second_last_iteration(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 2, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP1]] = add i32 [[TMP0]], 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = add i32 [[TMP0]], [[X:%.*]]
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[VAL_PHI_LCSSA]]
;
; UNROLL-LABEL: @extract_second_last_iteration(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[TMP0:%.*]] = phi i32 [ 2, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NEXT: [[TMP1]] = add i32 [[TMP0]], 8
; UNROLL-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 4
; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = add i32 [[TMP3]], [[X:%.*]]
; UNROLL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL: for.body:
; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; UNROLL: for.end:
; UNROLL-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: ret i32 [[VAL_PHI_LCSSA]]
;
; UNROLL-NO-IC-LABEL: @extract_second_last_iteration(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64>
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = zext <4 x i32> [[STEP_ADD]] to <4 x i64>
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[TMP11]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT3]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP10]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 4, i32 4, i32 4, i32 4>
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; UNROLL-NO-IC-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 96, 96
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP11]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP11]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-IC: for.body:
; UNROLL-NO-IC-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[INC]] = add i32 [[INC_PHI]], 1
; UNROLL-NO-IC-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64
; UNROLL-NO-IC-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]]
; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95
; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[VAL_PHI_LCSSA]]
;
; UNROLL-NO-VF-LABEL: @extract_second_last_iteration(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i32 [[INDEX]], 0
; UNROLL-NO-VF-NEXT: [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = zext i32 [[INDUCTION]] to i64
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = zext i32 [[INDUCTION1]] to i64
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i32 [[INDUCTION]], [[X:%.*]]
; UNROLL-NO-VF-NEXT: [[TMP3]] = add i32 [[INDUCTION1]], [[X]]
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; UNROLL-NO-VF-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 96, 96
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-VF: for.body:
; UNROLL-NO-VF-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INC]] = add i32 [[INC_PHI]], 1
; UNROLL-NO-VF-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64
; UNROLL-NO-VF-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]]
; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95
; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: ret i32 [[VAL_PHI_LCSSA]]
;
; SINK-AFTER-LABEL: @extract_second_last_iteration(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
; SINK-AFTER-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64>
; SINK-AFTER-NEXT: [[TMP5]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
; SINK-AFTER-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; SINK-AFTER-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 96, 96
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; SINK-AFTER: for.body:
; SINK-AFTER-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[INC]] = add i32 [[INC_PHI]], 1
; SINK-AFTER-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64
; SINK-AFTER-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]]
; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95
; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: ret i32 [[VAL_PHI_LCSSA]]
;
; NO-SINK-AFTER-LABEL: @extract_second_last_iteration(
; NO-SINK-AFTER-NEXT: entry:
; NO-SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-SINK-AFTER: vector.ph:
; NO-SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
; NO-SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; NO-SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-SINK-AFTER: vector.body:
; NO-SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; NO-SINK-AFTER-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; NO-SINK-AFTER-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
; NO-SINK-AFTER-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64>
; NO-SINK-AFTER-NEXT: [[TMP5]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; NO-SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; NO-SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
; NO-SINK-AFTER-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; NO-SINK-AFTER-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; NO-SINK-AFTER: middle.block:
; NO-SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 96, 96
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2
; NO-SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; NO-SINK-AFTER: scalar.ph:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; NO-SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; NO-SINK-AFTER: for.body:
; NO-SINK-AFTER-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[INC]] = add i32 [[INC_PHI]], 1
; NO-SINK-AFTER-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64
; NO-SINK-AFTER-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]]
; NO-SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95
; NO-SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; NO-SINK-AFTER: for.end:
; NO-SINK-AFTER-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: ret i32 [[VAL_PHI_LCSSA]]
;
entry:
br label %for.body
for.body:
%inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%val.phi = phi i32 [ 0, %entry ], [ %addx, %for.body ]
%inc = add i32 %inc.phi, 1
%bc = zext i32 %inc.phi to i64
%addx = add i32 %inc.phi, %x
%cmp = icmp eq i32 %inc.phi, 95
br i1 %cmp, label %for.end, label %for.body
for.end:
ret i32 %val.phi
}
; We vectorize this first order recurrence, with a set of insertelements for
; each unrolled part. Make sure these insertelements are generated in-order,
; because the shuffle of the first order recurrence will be added after the
; insertelement of the last part UF - 1, assuming the latter appears after the
; insertelements of all other parts.
;
; int PR33613(double *b, double j, int d) {
; int a = 0;
; for(int i = 0; i < 10240; i++, b+=25) {
; double f = b[d]; // Scalarize to form insertelements
; if (j * f)
; a++;
; j = f;
; }
; return a;
; }
;
;
define i32 @PR33613(double* %b, double %j, i32 %d) {
; CHECK-LABEL: @PR33613(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i64 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 25
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr double, double* [[B:%.*]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 25
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 25
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 25
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP11:%.*]] = load double, double* [[TMP7]], align 8
; CHECK-NEXT: [[TMP12:%.*]] = load double, double* [[TMP8]], align 8
; CHECK-NEXT: [[TMP13:%.*]] = load double, double* [[TMP9]], align 8
; CHECK-NEXT: [[TMP14:%.*]] = load double, double* [[TMP10]], align 8
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x double> poison, double [[TMP11]], i64 0
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x double> [[TMP15]], double [[TMP12]], i64 1
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x double> [[TMP16]], double [[TMP13]], i64 2
; CHECK-NEXT: [[TMP18]] = insertelement <4 x double> [[TMP17]], double [[TMP14]], i64 3
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP17]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP20:%.*]] = fmul <4 x double> [[TMP19]], [[TMP18]]
; CHECK-NEXT: [[TMP21:%.*]] = fcmp une <4 x double> [[TMP20]], zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = zext <4 x i1> [[TMP21]] to <4 x i32>
; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI]], [[TMP22]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP23]])
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[A_1_LCSSA]]
; CHECK: for.body:
; CHECK-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
;
; UNROLL-LABEL: @PR33613(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i64 3
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP38:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 25
; UNROLL-NEXT: [[NEXT_GEP:%.*]] = getelementptr double, double* [[B:%.*]], i64 [[IDXPROM]]
; UNROLL-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 1
; UNROLL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 25
; UNROLL-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
; UNROLL-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 2
; UNROLL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 25
; UNROLL-NEXT: [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
; UNROLL-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 3
; UNROLL-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 25
; UNROLL-NEXT: [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
; UNROLL-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 4
; UNROLL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 25
; UNROLL-NEXT: [[NEXT_GEP5:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
; UNROLL-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 5
; UNROLL-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 25
; UNROLL-NEXT: [[NEXT_GEP6:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
; UNROLL-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 6
; UNROLL-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 25
; UNROLL-NEXT: [[NEXT_GEP7:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
; UNROLL-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 7
; UNROLL-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 25
; UNROLL-NEXT: [[NEXT_GEP8:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[TMP0]]
; UNROLL-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[TMP2]]
; UNROLL-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[TMP4]]
; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[TMP6]]
; UNROLL-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP5]], i64 [[TMP8]]
; UNROLL-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP6]], i64 [[TMP10]]
; UNROLL-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP7]], i64 [[TMP12]]
; UNROLL-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP8]], i64 [[TMP14]]
; UNROLL-NEXT: [[TMP23:%.*]] = load double, double* [[TMP15]], align 8
; UNROLL-NEXT: [[TMP24:%.*]] = load double, double* [[TMP16]], align 8
; UNROLL-NEXT: [[TMP25:%.*]] = load double, double* [[TMP17]], align 8
; UNROLL-NEXT: [[TMP26:%.*]] = load double, double* [[TMP18]], align 8
; UNROLL-NEXT: [[TMP27:%.*]] = insertelement <4 x double> poison, double [[TMP23]], i64 0
; UNROLL-NEXT: [[TMP28:%.*]] = insertelement <4 x double> [[TMP27]], double [[TMP24]], i64 1
; UNROLL-NEXT: [[TMP29:%.*]] = insertelement <4 x double> [[TMP28]], double [[TMP25]], i64 2
; UNROLL-NEXT: [[TMP30:%.*]] = insertelement <4 x double> [[TMP29]], double [[TMP26]], i64 3
; UNROLL-NEXT: [[TMP31:%.*]] = load double, double* [[TMP19]], align 8
; UNROLL-NEXT: [[TMP32:%.*]] = load double, double* [[TMP20]], align 8
; UNROLL-NEXT: [[TMP33:%.*]] = load double, double* [[TMP21]], align 8
; UNROLL-NEXT: [[TMP34:%.*]] = load double, double* [[TMP22]], align 8
; UNROLL-NEXT: [[TMP35:%.*]] = insertelement <4 x double> poison, double [[TMP31]], i64 0
; UNROLL-NEXT: [[TMP36:%.*]] = insertelement <4 x double> [[TMP35]], double [[TMP32]], i64 1
; UNROLL-NEXT: [[TMP37:%.*]] = insertelement <4 x double> [[TMP36]], double [[TMP33]], i64 2
; UNROLL-NEXT: [[TMP38]] = insertelement <4 x double> [[TMP37]], double [[TMP34]], i64 3
; UNROLL-NEXT: [[TMP39:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP29]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP40:%.*]] = shufflevector <4 x double> [[TMP30]], <4 x double> [[TMP37]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP41:%.*]] = fmul <4 x double> [[TMP39]], [[TMP30]]
; UNROLL-NEXT: [[TMP42:%.*]] = fmul <4 x double> [[TMP40]], [[TMP38]]
; UNROLL-NEXT: [[TMP43:%.*]] = fcmp une <4 x double> [[TMP41]], zeroinitializer
; UNROLL-NEXT: [[TMP44:%.*]] = fcmp une <4 x double> [[TMP42]], zeroinitializer
; UNROLL-NEXT: [[TMP45:%.*]] = zext <4 x i1> [[TMP43]] to <4 x i32>
; UNROLL-NEXT: [[TMP46:%.*]] = zext <4 x i1> [[TMP44]] to <4 x i32>
; UNROLL-NEXT: [[TMP47]] = add <4 x i32> [[VEC_PHI]], [[TMP45]]
; UNROLL-NEXT: [[TMP48]] = add <4 x i32> [[VEC_PHI9]], [[TMP46]]
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
; UNROLL-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP48]], [[TMP47]]
; UNROLL-NEXT: [[TMP50:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; UNROLL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL: for.cond.cleanup:
; UNROLL-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: ret i32 [[A_1_LCSSA]]
; UNROLL: for.body:
; UNROLL-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
;
; UNROLL-NO-IC-LABEL: @PR33613(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = getelementptr double, double* [[B:%.*]], i64 256000
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP51:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP41:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 25
; UNROLL-NO-IC-NEXT: [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 25
; UNROLL-NO-IC-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]]
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 25
; UNROLL-NO-IC-NEXT: [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 25
; UNROLL-NO-IC-NEXT: [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 25
; UNROLL-NO-IC-NEXT: [[NEXT_GEP5:%.*]] = getelementptr double, double* [[B]], i64 [[TMP9]]
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 5
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 25
; UNROLL-NO-IC-NEXT: [[NEXT_GEP6:%.*]] = getelementptr double, double* [[B]], i64 [[TMP11]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 6
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 25
; UNROLL-NO-IC-NEXT: [[NEXT_GEP7:%.*]] = getelementptr double, double* [[B]], i64 [[TMP13]]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 7
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 25
; UNROLL-NO-IC-NEXT: [[NEXT_GEP8:%.*]] = getelementptr double, double* [[B]], i64 [[TMP15]]
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 0
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 4
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP5]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP6]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP7]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP8]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = load double, double* [[TMP18]], align 8
; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = load double, double* [[TMP19]], align 8
; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = load double, double* [[TMP20]], align 8
; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = load double, double* [[TMP21]], align 8
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = insertelement <4 x double> poison, double [[TMP26]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = insertelement <4 x double> [[TMP30]], double [[TMP27]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = insertelement <4 x double> [[TMP31]], double [[TMP28]], i32 2
; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = insertelement <4 x double> [[TMP32]], double [[TMP29]], i32 3
; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = load double, double* [[TMP22]], align 8
; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = load double, double* [[TMP23]], align 8
; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = load double, double* [[TMP24]], align 8
; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = load double, double* [[TMP25]], align 8
; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = insertelement <4 x double> poison, double [[TMP34]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = insertelement <4 x double> [[TMP38]], double [[TMP35]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = insertelement <4 x double> [[TMP39]], double [[TMP36]], i32 2
; UNROLL-NO-IC-NEXT: [[TMP41]] = insertelement <4 x double> [[TMP40]], double [[TMP37]], i32 3
; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP33]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = shufflevector <4 x double> [[TMP33]], <4 x double> [[TMP41]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = fmul <4 x double> [[TMP42]], [[TMP33]]
; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = fmul <4 x double> [[TMP43]], [[TMP41]]
; UNROLL-NO-IC-NEXT: [[TMP46:%.*]] = fcmp une <4 x double> [[TMP44]], zeroinitializer
; UNROLL-NO-IC-NEXT: [[TMP47:%.*]] = fcmp une <4 x double> [[TMP45]], zeroinitializer
; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = zext <4 x i1> [[TMP46]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = zext <4 x i1> [[TMP47]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP50]] = add <4 x i32> [[VEC_PHI]], [[TMP48]]
; UNROLL-NO-IC-NEXT: [[TMP51]] = add <4 x i32> [[VEC_PHI9]], [[TMP49]]
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
; UNROLL-NO-IC-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP51]], [[TMP50]]
; UNROLL-NO-IC-NEXT: [[TMP53:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 10240, 10240
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[TMP41]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[TMP41]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP53]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-IC: for.cond.cleanup:
; UNROLL-NO-IC-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP53]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[A_1_LCSSA]]
; UNROLL-NO-IC: for.body:
; UNROLL-NO-IC-NEXT: [[B_ADDR_012:%.*]] = phi double* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP54:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP54]] = load double, double* [[ARRAYIDX]], align 8
; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP54]]
; UNROLL-NO-IC-NEXT: [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
; UNROLL-NO-IC-NEXT: [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
; UNROLL-NO-IC-NEXT: [[A_1]] = add nsw i32 [[A_010]], [[INC]]
; UNROLL-NO-IC-NEXT: [[INC1]] = add nuw nsw i32 [[I_011]], 1
; UNROLL-NO-IC-NEXT: [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
;
; UNROLL-NO-VF-LABEL: @PR33613(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = getelementptr double, double* [[B:%.*]], i64 256000
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI4:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi double [ [[J:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 25
; UNROLL-NO-VF-NEXT: [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]]
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 25
; UNROLL-NO-VF-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]]
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0
; UNROLL-NO-VF-NEXT: [[INDUCTION3:%.*]] = add i32 [[OFFSET_IDX]], 1
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]]
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]]
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = load double, double* [[TMP4]], align 8
; UNROLL-NO-VF-NEXT: [[TMP7]] = load double, double* [[TMP5]], align 8
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = fmul double [[VECTOR_RECUR]], [[TMP6]]
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = fmul double [[TMP6]], [[TMP7]]
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = fcmp une double [[TMP8]], 0.000000e+00
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = fcmp une double [[TMP9]], 0.000000e+00
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = zext i1 [[TMP10]] to i32
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = zext i1 [[TMP11]] to i32
; UNROLL-NO-VF-NEXT: [[TMP14]] = add i32 [[VEC_PHI]], [[TMP12]]
; UNROLL-NO-VF-NEXT: [[TMP15]] = add i32 [[VEC_PHI4]], [[TMP13]]
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP15]], [[TMP14]]
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 10240, 10240
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-VF: for.cond.cleanup:
; UNROLL-NO-VF-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: ret i32 [[A_1_LCSSA]]
; UNROLL-NO-VF: for.body:
; UNROLL-NO-VF-NEXT: [[B_ADDR_012:%.*]] = phi double* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 [[IDXPROM]]
; UNROLL-NO-VF-NEXT: [[TMP17]] = load double, double* [[ARRAYIDX]], align 8
; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP17]]
; UNROLL-NO-VF-NEXT: [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
; UNROLL-NO-VF-NEXT: [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
; UNROLL-NO-VF-NEXT: [[A_1]] = add nsw i32 [[A_010]], [[INC]]
; UNROLL-NO-VF-NEXT: [[INC1]] = add nuw nsw i32 [[I_011]], 1
; UNROLL-NO-VF-NEXT: [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
;
; SINK-AFTER-LABEL: @PR33613(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[IND_END:%.*]] = getelementptr double, double* [[B:%.*]], i64 256000
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; SINK-AFTER-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 25
; SINK-AFTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]]
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; SINK-AFTER-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 25
; SINK-AFTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]]
; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2
; SINK-AFTER-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 25
; SINK-AFTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[TMP5]]
; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3
; SINK-AFTER-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 25
; SINK-AFTER-NEXT: [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[TMP7]]
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
; SINK-AFTER-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], 0
; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]]
; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]]
; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[IDXPROM]]
; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[IDXPROM]]
; SINK-AFTER-NEXT: [[TMP13:%.*]] = load double, double* [[TMP9]], align 8
; SINK-AFTER-NEXT: [[TMP14:%.*]] = load double, double* [[TMP10]], align 8
; SINK-AFTER-NEXT: [[TMP15:%.*]] = load double, double* [[TMP11]], align 8
; SINK-AFTER-NEXT: [[TMP16:%.*]] = load double, double* [[TMP12]], align 8
; SINK-AFTER-NEXT: [[TMP17:%.*]] = insertelement <4 x double> poison, double [[TMP13]], i32 0
; SINK-AFTER-NEXT: [[TMP18:%.*]] = insertelement <4 x double> [[TMP17]], double [[TMP14]], i32 1
; SINK-AFTER-NEXT: [[TMP19:%.*]] = insertelement <4 x double> [[TMP18]], double [[TMP15]], i32 2
; SINK-AFTER-NEXT: [[TMP20]] = insertelement <4 x double> [[TMP19]], double [[TMP16]], i32 3
; SINK-AFTER-NEXT: [[TMP21:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP20]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP22:%.*]] = fmul <4 x double> [[TMP21]], [[TMP20]]
; SINK-AFTER-NEXT: [[TMP23:%.*]] = fcmp une <4 x double> [[TMP22]], zeroinitializer
; SINK-AFTER-NEXT: [[TMP24:%.*]] = zext <4 x i1> [[TMP23]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP25]] = add <4 x i32> [[VEC_PHI]], [[TMP24]]
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
; SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]])
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 10240, 10240
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[TMP20]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[TMP20]], i32 2
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; SINK-AFTER: for.cond.cleanup:
; SINK-AFTER-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: ret i32 [[A_1_LCSSA]]
; SINK-AFTER: for.body:
; SINK-AFTER-NEXT: [[B_ADDR_012:%.*]] = phi double* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 [[IDXPROM]]
; SINK-AFTER-NEXT: [[TMP28]] = load double, double* [[ARRAYIDX]], align 8
; SINK-AFTER-NEXT: [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP28]]
; SINK-AFTER-NEXT: [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
; SINK-AFTER-NEXT: [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
; SINK-AFTER-NEXT: [[A_1]] = add nsw i32 [[A_010]], [[INC]]
; SINK-AFTER-NEXT: [[INC1]] = add nuw nsw i32 [[I_011]], 1
; SINK-AFTER-NEXT: [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
;
; NO-SINK-AFTER-LABEL: @PR33613(
; NO-SINK-AFTER-NEXT: entry:
; NO-SINK-AFTER-NEXT: [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
; NO-SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-SINK-AFTER: vector.ph:
; NO-SINK-AFTER-NEXT: [[IND_END:%.*]] = getelementptr double, double* [[B:%.*]], i64 256000
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i32 3
; NO-SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-SINK-AFTER: vector.body:
; NO-SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; NO-SINK-AFTER-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 25
; NO-SINK-AFTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]]
; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; NO-SINK-AFTER-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 25
; NO-SINK-AFTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]]
; NO-SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2
; NO-SINK-AFTER-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 25
; NO-SINK-AFTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[TMP5]]
; NO-SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3
; NO-SINK-AFTER-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 25
; NO-SINK-AFTER-NEXT: [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[TMP7]]
; NO-SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
; NO-SINK-AFTER-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], 0
; NO-SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]]
; NO-SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]]
; NO-SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[IDXPROM]]
; NO-SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[IDXPROM]]
; NO-SINK-AFTER-NEXT: [[TMP13:%.*]] = load double, double* [[TMP9]], align 8
; NO-SINK-AFTER-NEXT: [[TMP14:%.*]] = load double, double* [[TMP10]], align 8
; NO-SINK-AFTER-NEXT: [[TMP15:%.*]] = load double, double* [[TMP11]], align 8
; NO-SINK-AFTER-NEXT: [[TMP16:%.*]] = load double, double* [[TMP12]], align 8
; NO-SINK-AFTER-NEXT: [[TMP17:%.*]] = insertelement <4 x double> poison, double [[TMP13]], i32 0
; NO-SINK-AFTER-NEXT: [[TMP18:%.*]] = insertelement <4 x double> [[TMP17]], double [[TMP14]], i32 1
; NO-SINK-AFTER-NEXT: [[TMP19:%.*]] = insertelement <4 x double> [[TMP18]], double [[TMP15]], i32 2
; NO-SINK-AFTER-NEXT: [[TMP20]] = insertelement <4 x double> [[TMP19]], double [[TMP16]], i32 3
; NO-SINK-AFTER-NEXT: [[TMP21:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP20]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; NO-SINK-AFTER-NEXT: [[TMP22:%.*]] = fmul <4 x double> [[TMP21]], [[TMP20]]
; NO-SINK-AFTER-NEXT: [[TMP23:%.*]] = fcmp une <4 x double> [[TMP22]], zeroinitializer
; NO-SINK-AFTER-NEXT: [[TMP24:%.*]] = zext <4 x i1> [[TMP23]] to <4 x i32>
; NO-SINK-AFTER-NEXT: [[TMP25]] = add <4 x i32> [[VEC_PHI]], [[TMP24]]
; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; NO-SINK-AFTER-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
; NO-SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; NO-SINK-AFTER: middle.block:
; NO-SINK-AFTER-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]])
; NO-SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 10240, 10240
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[TMP20]], i32 3
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[TMP20]], i32 2
; NO-SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; NO-SINK-AFTER: scalar.ph:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; NO-SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; NO-SINK-AFTER: for.cond.cleanup:
; NO-SINK-AFTER-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: ret i32 [[A_1_LCSSA]]
; NO-SINK-AFTER: for.body:
; NO-SINK-AFTER-NEXT: [[B_ADDR_012:%.*]] = phi double* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 [[IDXPROM]]
; NO-SINK-AFTER-NEXT: [[TMP28]] = load double, double* [[ARRAYIDX]], align 8
; NO-SINK-AFTER-NEXT: [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP28]]
; NO-SINK-AFTER-NEXT: [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
; NO-SINK-AFTER-NEXT: [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
; NO-SINK-AFTER-NEXT: [[A_1]] = add nsw i32 [[A_010]], [[INC]]
; NO-SINK-AFTER-NEXT: [[INC1]] = add nuw nsw i32 [[I_011]], 1
; NO-SINK-AFTER-NEXT: [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25
; NO-SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
; NO-SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
;
entry:
%idxprom = sext i32 %d to i64
br label %for.body
for.cond.cleanup:
%a.1.lcssa = phi i32 [ %a.1, %for.body ]
ret i32 %a.1.lcssa
for.body:
%b.addr.012 = phi double* [ %b, %entry ], [ %add.ptr, %for.body ]
%i.011 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
%a.010 = phi i32 [ 0, %entry ], [ %a.1, %for.body ]
%j.addr.09 = phi double [ %j, %entry ], [ %0, %for.body ]
%arrayidx = getelementptr inbounds double, double* %b.addr.012, i64 %idxprom
%0 = load double, double* %arrayidx, align 8
%mul = fmul double %j.addr.09, %0
%tobool = fcmp une double %mul, 0.000000e+00
%inc = zext i1 %tobool to i32
%a.1 = add nsw i32 %a.010, %inc
%inc1 = add nuw nsw i32 %i.011, 1
%add.ptr = getelementptr inbounds double, double* %b.addr.012, i64 25
%exitcond = icmp eq i32 %inc1, 10240
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
; void sink_after(short *a, int n, int *b) {
; for(int i = 0; i < n; i++)
; b[i] = (a[i] * a[i + 1]);
; }
;
; Check that the sext sank after the load in the vector loop.
;
define void @sink_after(i16* %a, i32* %b, i64 %n) {
; CHECK-LABEL: @sink_after(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]]
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[SCEVGEP5]] to i32*
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[TMP1]], [[B]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[SCEVGEP]] to i16*
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP3]], [[TMP2]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !26
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32>
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP11]], align 4, !alias.scope !29, !noalias !26
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP13]] = load i16, i16* [[ARRAYIDX2]], align 2
; CHECK-NEXT: [[CONV3:%.*]] = sext i16 [[TMP13]] to i32
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; UNROLL-LABEL: @sink_after(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; UNROLL: vector.memcheck:
; UNROLL-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]]
; UNROLL-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; UNROLL-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
; UNROLL-NEXT: [[TMP1:%.*]] = bitcast i16* [[SCEVGEP5]] to i32*
; UNROLL-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[TMP1]], [[B]]
; UNROLL-NEXT: [[TMP2:%.*]] = bitcast i32* [[SCEVGEP]] to i16*
; UNROLL-NEXT: [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP3]], [[TMP2]]
; UNROLL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; UNROLL-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8
; UNROLL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1
; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
; UNROLL-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !26
; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i64 4
; UNROLL-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>*
; UNROLL-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2, !alias.scope !26
; UNROLL-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP9:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP10:%.*]] = sext <4 x i16> [[TMP8]] to <4 x i32>
; UNROLL-NEXT: [[TMP11:%.*]] = sext <4 x i16> [[TMP9]] to <4 x i32>
; UNROLL-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
; UNROLL-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[WIDE_LOAD7]] to <4 x i32>
; UNROLL-NEXT: [[TMP14:%.*]] = mul nsw <4 x i32> [[TMP12]], [[TMP10]]
; UNROLL-NEXT: [[TMP15:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP11]]
; UNROLL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; UNROLL-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
; UNROLL-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP17]], align 4, !alias.scope !29, !noalias !26
; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i64 4
; UNROLL-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
; UNROLL-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP19]], align 4, !alias.scope !29, !noalias !26
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i64 3
; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL: for.body:
; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[FOR_BODY]] ]
; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; UNROLL-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-NEXT: [[TMP21]] = load i16, i16* [[ARRAYIDX2]], align 2
; UNROLL-NEXT: [[CONV3:%.*]] = sext i16 [[TMP21]] to i32
; UNROLL-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; UNROLL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; UNROLL-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; UNROLL: for.end:
; UNROLL-NEXT: ret void
;
; UNROLL-NO-IC-LABEL: @sink_after(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; UNROLL-NO-IC-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; UNROLL-NO-IC: vector.memcheck:
; UNROLL-NO-IC-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
; UNROLL-NO-IC-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; UNROLL-NO-IC-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
; UNROLL-NO-IC-NEXT: [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8*
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; UNROLL-NO-IC-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
; UNROLL-NO-IC-NEXT: [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
; UNROLL-NO-IC-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
; UNROLL-NO-IC-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
; UNROLL-NO-IC-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; UNROLL-NO-IC-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP1]], 1
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP2]], 1
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP4]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <4 x i16>*
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP8]], align 2, !alias.scope !26
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 4
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>*
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !26
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = sext <4 x i16> [[TMP12]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = sext <4 x i16> [[WIDE_LOAD7]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP13]]
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul nsw <4 x i32> [[TMP16]], [[TMP14]]
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP22]], align 4, !alias.scope !29, !noalias !26
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 4
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP18]], <4 x i32>* [[TMP24]], align 4, !alias.scope !29, !noalias !26
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-IC: for.body:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP26:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-NO-IC-NEXT: [[TMP26]] = load i16, i16* [[ARRAYIDX2]], align 2
; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP26]] to i32
; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @sink_after(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; UNROLL-NO-VF-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; UNROLL-NO-VF: vector.memcheck:
; UNROLL-NO-VF-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
; UNROLL-NO-VF-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; UNROLL-NO-VF-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
; UNROLL-NO-VF-NEXT: [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8*
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; UNROLL-NO-VF-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
; UNROLL-NO-VF-NEXT: [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
; UNROLL-NO-VF-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
; UNROLL-NO-VF-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
; UNROLL-NO-VF-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; UNROLL-NO-VF-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-VF-NEXT: [[INDUCTION7:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDUCTION]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[INDUCTION7]], 1
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]]
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]]
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]], align 2, !alias.scope !25
; UNROLL-NO-VF-NEXT: [[TMP6]] = load i16, i16* [[TMP4]], align 2, !alias.scope !25
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sext i16 [[TMP5]] to i32
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sext i16 [[TMP5]] to i32
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = sext i16 [[TMP6]] to i32
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = mul nsw i32 [[TMP9]], [[TMP7]]
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = mul nsw i32 [[TMP10]], [[TMP8]]
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]]
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION7]]
; UNROLL-NO-VF-NEXT: store i32 [[TMP11]], i32* [[TMP13]], align 4, !alias.scope !28, !noalias !25
; UNROLL-NO-VF-NEXT: store i32 [[TMP12]], i32* [[TMP14]], align 4, !alias.scope !28, !noalias !25
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-VF: for.body:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP16:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-VF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-NO-VF-NEXT: [[TMP16]] = load i16, i16* [[ARRAYIDX2]], align 2
; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP16]] to i32
; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @sink_after(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; SINK-AFTER: vector.memcheck:
; SINK-AFTER-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
; SINK-AFTER-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; SINK-AFTER-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
; SINK-AFTER-NEXT: [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8*
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; SINK-AFTER-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
; SINK-AFTER-NEXT: [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
; SINK-AFTER-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
; SINK-AFTER-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
; SINK-AFTER-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; SINK-AFTER-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; SINK-AFTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]]
; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0
; SINK-AFTER-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !26
; SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP7]]
; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0
; SINK-AFTER-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
; SINK-AFTER-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP12]], align 4, !alias.scope !29, !noalias !26
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; SINK-AFTER: for.body:
; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; SINK-AFTER-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
; SINK-AFTER-NEXT: [[TMP14]] = load i16, i16* [[ARRAYIDX2]], align 2
; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32
; SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; SINK-AFTER-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
; NO-SINK-AFTER-LABEL: @sink_after(
; NO-SINK-AFTER-NEXT: entry:
; NO-SINK-AFTER-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; NO-SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
; NO-SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; NO-SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; NO-SINK-AFTER: vector.memcheck:
; NO-SINK-AFTER-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
; NO-SINK-AFTER-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; NO-SINK-AFTER-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
; NO-SINK-AFTER-NEXT: [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8*
; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; NO-SINK-AFTER-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
; NO-SINK-AFTER-NEXT: [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
; NO-SINK-AFTER-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
; NO-SINK-AFTER-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
; NO-SINK-AFTER-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; NO-SINK-AFTER-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NO-SINK-AFTER: vector.ph:
; NO-SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; NO-SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
; NO-SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-SINK-AFTER: vector.body:
; NO-SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; NO-SINK-AFTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]]
; NO-SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0
; NO-SINK-AFTER-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
; NO-SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !26
; NO-SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; NO-SINK-AFTER-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32>
; NO-SINK-AFTER-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
; NO-SINK-AFTER-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP7]]
; NO-SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
; NO-SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0
; NO-SINK-AFTER-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
; NO-SINK-AFTER-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP12]], align 4, !alias.scope !29, !noalias !26
; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; NO-SINK-AFTER-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
; NO-SINK-AFTER: middle.block:
; NO-SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; NO-SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; NO-SINK-AFTER: scalar.ph:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; NO-SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; NO-SINK-AFTER: for.body:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; NO-SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; NO-SINK-AFTER-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
; NO-SINK-AFTER-NEXT: [[TMP14]] = load i16, i16* [[ARRAYIDX2]], align 2
; NO-SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32
; NO-SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; NO-SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; NO-SINK-AFTER-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; NO-SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; NO-SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; NO-SINK-AFTER: for.end:
; NO-SINK-AFTER-NEXT: ret void
;
entry:
%.pre = load i16, i16* %a
br label %for.body
for.body:
%0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%conv = sext i16 %0 to i32
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%arrayidx2 = getelementptr inbounds i16, i16* %a, i64 %indvars.iv.next
%1 = load i16, i16* %arrayidx2
%conv3 = sext i16 %1 to i32
%mul = mul nsw i32 %conv3, %conv
%arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
store i32 %mul, i32* %arrayidx5
%exitcond = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
; PR34711: given three consecutive instructions such that the first will be
; widened, the second is a cast that will be widened and needs to sink after the
; third, and the third is a first-order-recurring load that will be replicated
; instead of widened. Although the cast and the first instruction will both be
; widened, and are originally adjacent to each other, make sure the replicated
; load ends up appearing between them.
;
; void PR34711(short[2] *a, int *b, int *c, int n) {
; for(int i = 0; i < n; i++) {
; c[i] = 7;
; b[i] = (a[i][0] * a[i][1]);
; }
; }
;
; Check that the sext sank after the load in the vector loop.
;
define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) {
; CHECK-LABEL: @PR34711(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[C:%.*]], i64 [[N]]
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]]
; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 0, i64 1
; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 [[N]], i64 0
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[C]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[SCEVGEP8]] to i32*
; CHECK-NEXT: [[BOUND010:%.*]] = icmp ugt i32* [[TMP0]], [[C]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[SCEVGEP]] to i16*
; CHECK-NEXT: [[BOUND111:%.*]] = icmp ult i16* [[SCEVGEP6]], [[TMP1]]
; CHECK-NEXT: [[FOUND_CONFLICT12:%.*]] = and i1 [[BOUND010]], [[BOUND111]]
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[SCEVGEP8]] to i32*
; CHECK-NEXT: [[BOUND013:%.*]] = icmp ugt i32* [[TMP2]], [[B]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[SCEVGEP4]] to i16*
; CHECK-NEXT: [[BOUND114:%.*]] = icmp ult i16* [[SCEVGEP6]], [[TMP3]]
; CHECK-NEXT: [[FOUND_CONFLICT15:%.*]] = and i1 [[BOUND013]], [[BOUND114]]
; CHECK-NEXT: [[CONFLICT_RDX16:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT15]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDEX]], i64 1
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP4]], i64 1
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP5]], i64 1
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP6]], i64 1
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP12]], align 4, !alias.scope !33, !noalias !36
; CHECK-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP8]], align 2, !alias.scope !39
; CHECK-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP9]], align 2, !alias.scope !39
; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP10]], align 2, !alias.scope !39
; CHECK-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP11]], align 2, !alias.scope !39
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i16> poison, i16 [[TMP13]], i64 0
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP14]], i64 1
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i16> [[TMP18]], i16 [[TMP15]], i64 2
; CHECK-NEXT: [[TMP20]] = insertelement <4 x i16> [[TMP19]], i16 [[TMP16]], i64 3
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP19]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i16> [[TMP21]] to <4 x i32>
; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i16> [[TMP20]] to <4 x i32>
; CHECK-NEXT: [[TMP24:%.*]] = mul nsw <4 x i32> [[TMP23]], [[TMP22]]
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP24]], <4 x i32>* [[TMP26]], align 4, !alias.scope !40, !noalias !39
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
; CHECK-NEXT: store i32 7, i32* [[ARRAYCIDX]], align 4
; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; CHECK-NEXT: [[TMP28]] = load i16, i16* [[CUR_INDEX]], align 2
; CHECK-NEXT: [[CONV3:%.*]] = sext i16 [[TMP28]] to i32
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; UNROLL-LABEL: @PR34711(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
; UNROLL-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; UNROLL: vector.memcheck:
; UNROLL-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[C:%.*]], i64 [[N]]
; UNROLL-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]]
; UNROLL-NEXT: [[SCEVGEP6:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 0, i64 1
; UNROLL-NEXT: [[SCEVGEP8:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 [[N]], i64 0
; UNROLL-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[C]]
; UNROLL-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]]
; UNROLL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; UNROLL-NEXT: [[TMP0:%.*]] = bitcast i16* [[SCEVGEP8]] to i32*
; UNROLL-NEXT: [[BOUND010:%.*]] = icmp ugt i32* [[TMP0]], [[C]]
; UNROLL-NEXT: [[TMP1:%.*]] = bitcast i32* [[SCEVGEP]] to i16*
; UNROLL-NEXT: [[BOUND111:%.*]] = icmp ult i16* [[SCEVGEP6]], [[TMP1]]
; UNROLL-NEXT: [[FOUND_CONFLICT12:%.*]] = and i1 [[BOUND010]], [[BOUND111]]
; UNROLL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]]
; UNROLL-NEXT: [[TMP2:%.*]] = bitcast i16* [[SCEVGEP8]] to i32*
; UNROLL-NEXT: [[BOUND013:%.*]] = icmp ugt i32* [[TMP2]], [[B]]
; UNROLL-NEXT: [[TMP3:%.*]] = bitcast i32* [[SCEVGEP4]] to i16*
; UNROLL-NEXT: [[BOUND114:%.*]] = icmp ult i16* [[SCEVGEP6]], [[TMP3]]
; UNROLL-NEXT: [[FOUND_CONFLICT15:%.*]] = and i1 [[BOUND013]], [[BOUND114]]
; UNROLL-NEXT: [[CONFLICT_RDX16:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT15]]
; UNROLL-NEXT: br i1 [[CONFLICT_RDX16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8
; UNROLL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP38:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 1
; UNROLL-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 2
; UNROLL-NEXT: [[TMP6:%.*]] = or i64 [[INDEX]], 3
; UNROLL-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 4
; UNROLL-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 5
; UNROLL-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 6
; UNROLL-NEXT: [[TMP10:%.*]] = or i64 [[INDEX]], 7
; UNROLL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX]]
; UNROLL-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDEX]], i64 1
; UNROLL-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP4]], i64 1
; UNROLL-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP5]], i64 1
; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP6]], i64 1
; UNROLL-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP7]], i64 1
; UNROLL-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP8]], i64 1
; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP9]], i64 1
; UNROLL-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP10]], i64 1
; UNROLL-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
; UNROLL-NEXT: store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP20]], align 4, !alias.scope !33, !noalias !36
; UNROLL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i64 4
; UNROLL-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
; UNROLL-NEXT: store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP22]], align 4, !alias.scope !33, !noalias !36
; UNROLL-NEXT: [[TMP23:%.*]] = load i16, i16* [[TMP12]], align 2, !alias.scope !39
; UNROLL-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP13]], align 2, !alias.scope !39
; UNROLL-NEXT: [[TMP25:%.*]] = load i16, i16* [[TMP14]], align 2, !alias.scope !39
; UNROLL-NEXT: [[TMP26:%.*]] = load i16, i16* [[TMP15]], align 2, !alias.scope !39
; UNROLL-NEXT: [[TMP27:%.*]] = insertelement <4 x i16> poison, i16 [[TMP23]], i64 0
; UNROLL-NEXT: [[TMP28:%.*]] = insertelement <4 x i16> [[TMP27]], i16 [[TMP24]], i64 1
; UNROLL-NEXT: [[TMP29:%.*]] = insertelement <4 x i16> [[TMP28]], i16 [[TMP25]], i64 2
; UNROLL-NEXT: [[TMP30:%.*]] = insertelement <4 x i16> [[TMP29]], i16 [[TMP26]], i64 3
; UNROLL-NEXT: [[TMP31:%.*]] = load i16, i16* [[TMP16]], align 2, !alias.scope !39
; UNROLL-NEXT: [[TMP32:%.*]] = load i16, i16* [[TMP17]], align 2, !alias.scope !39
; UNROLL-NEXT: [[TMP33:%.*]] = load i16, i16* [[TMP18]], align 2, !alias.scope !39
; UNROLL-NEXT: [[TMP34:%.*]] = load i16, i16* [[TMP19]], align 2, !alias.scope !39
; UNROLL-NEXT: [[TMP35:%.*]] = insertelement <4 x i16> poison, i16 [[TMP31]], i64 0
; UNROLL-NEXT: [[TMP36:%.*]] = insertelement <4 x i16> [[TMP35]], i16 [[TMP32]], i64 1
; UNROLL-NEXT: [[TMP37:%.*]] = insertelement <4 x i16> [[TMP36]], i16 [[TMP33]], i64 2
; UNROLL-NEXT: [[TMP38]] = insertelement <4 x i16> [[TMP37]], i16 [[TMP34]], i64 3
; UNROLL-NEXT: [[TMP39:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP29]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP40:%.*]] = shufflevector <4 x i16> [[TMP30]], <4 x i16> [[TMP37]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP41:%.*]] = sext <4 x i16> [[TMP39]] to <4 x i32>
; UNROLL-NEXT: [[TMP42:%.*]] = sext <4 x i16> [[TMP40]] to <4 x i32>
; UNROLL-NEXT: [[TMP43:%.*]] = sext <4 x i16> [[TMP30]] to <4 x i32>
; UNROLL-NEXT: [[TMP44:%.*]] = sext <4 x i16> [[TMP38]] to <4 x i32>
; UNROLL-NEXT: [[TMP45:%.*]] = mul nsw <4 x i32> [[TMP43]], [[TMP41]]
; UNROLL-NEXT: [[TMP46:%.*]] = mul nsw <4 x i32> [[TMP44]], [[TMP42]]
; UNROLL-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; UNROLL-NEXT: [[TMP48:%.*]] = bitcast i32* [[TMP47]] to <4 x i32>*
; UNROLL-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP48]], align 4, !alias.scope !40, !noalias !39
; UNROLL-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, i32* [[TMP47]], i64 4
; UNROLL-NEXT: [[TMP50:%.*]] = bitcast i32* [[TMP49]] to <4 x i32>*
; UNROLL-NEXT: store <4 x i32> [[TMP46]], <4 x i32>* [[TMP50]], align 4, !alias.scope !40, !noalias !39
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL: for.body:
; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP52:%.*]], [[FOR_BODY]] ]
; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; UNROLL-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
; UNROLL-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
; UNROLL-NEXT: store i32 7, i32* [[ARRAYCIDX]], align 4
; UNROLL-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; UNROLL-NEXT: [[TMP52]] = load i16, i16* [[CUR_INDEX]], align 2
; UNROLL-NEXT: [[CONV3:%.*]] = sext i16 [[TMP52]] to i32
; UNROLL-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; UNROLL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; UNROLL-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
; UNROLL: for.end:
; UNROLL-NEXT: ret void
;
; UNROLL-NO-IC-LABEL: @PR34711(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[C1:%.*]] = bitcast i32* [[C:%.*]] to i8*
; UNROLL-NO-IC-NEXT: [[B3:%.*]] = bitcast i32* [[B:%.*]] to i8*
; UNROLL-NO-IC-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
; UNROLL-NO-IC-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; UNROLL-NO-IC: vector.memcheck:
; UNROLL-NO-IC-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[C]], i64 [[N]]
; UNROLL-NO-IC-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; UNROLL-NO-IC-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
; UNROLL-NO-IC-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
; UNROLL-NO-IC-NEXT: [[SCEVGEP6:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 0, i64 1
; UNROLL-NO-IC-NEXT: [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8*
; UNROLL-NO-IC-NEXT: [[SCEVGEP8:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 [[N]], i64 0
; UNROLL-NO-IC-NEXT: [[SCEVGEP89:%.*]] = bitcast i16* [[SCEVGEP8]] to i8*
; UNROLL-NO-IC-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[C1]], [[SCEVGEP45]]
; UNROLL-NO-IC-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[B3]], [[SCEVGEP2]]
; UNROLL-NO-IC-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; UNROLL-NO-IC-NEXT: [[BOUND010:%.*]] = icmp ult i8* [[C1]], [[SCEVGEP89]]
; UNROLL-NO-IC-NEXT: [[BOUND111:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP2]]
; UNROLL-NO-IC-NEXT: [[FOUND_CONFLICT12:%.*]] = and i1 [[BOUND010]], [[BOUND111]]
; UNROLL-NO-IC-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]]
; UNROLL-NO-IC-NEXT: [[BOUND013:%.*]] = icmp ult i8* [[B3]], [[SCEVGEP89]]
; UNROLL-NO-IC-NEXT: [[BOUND114:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP45]]
; UNROLL-NO-IC-NEXT: [[FOUND_CONFLICT15:%.*]] = and i1 [[BOUND013]], [[BOUND114]]
; UNROLL-NO-IC-NEXT: [[CONFLICT_RDX16:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT15]]
; UNROLL-NO-IC-NEXT: br i1 [[CONFLICT_RDX16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP0]]
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP4]]
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP3]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP4]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP5]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP6]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP7]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP19]], align 4, !alias.scope !33, !noalias !36
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 4
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP21]], align 4, !alias.scope !33, !noalias !36
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = load i16, i16* [[TMP10]], align 2, !alias.scope !39
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = load i16, i16* [[TMP11]], align 2, !alias.scope !39
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP12]], align 2, !alias.scope !39
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load i16, i16* [[TMP13]], align 2, !alias.scope !39
; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> poison, i16 [[TMP22]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = insertelement <4 x i16> [[TMP26]], i16 [[TMP23]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = insertelement <4 x i16> [[TMP27]], i16 [[TMP24]], i32 2
; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = insertelement <4 x i16> [[TMP28]], i16 [[TMP25]], i32 3
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = load i16, i16* [[TMP14]], align 2, !alias.scope !39
; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = load i16, i16* [[TMP15]], align 2, !alias.scope !39
; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = load i16, i16* [[TMP16]], align 2, !alias.scope !39
; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = load i16, i16* [[TMP17]], align 2, !alias.scope !39
; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = insertelement <4 x i16> poison, i16 [[TMP30]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = insertelement <4 x i16> [[TMP34]], i16 [[TMP31]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = insertelement <4 x i16> [[TMP35]], i16 [[TMP32]], i32 2
; UNROLL-NO-IC-NEXT: [[TMP37]] = insertelement <4 x i16> [[TMP36]], i16 [[TMP33]], i32 3
; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP29]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = shufflevector <4 x i16> [[TMP29]], <4 x i16> [[TMP37]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = sext <4 x i16> [[TMP38]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = sext <4 x i16> [[TMP39]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = sext <4 x i16> [[TMP29]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = sext <4 x i16> [[TMP37]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = mul nsw <4 x i32> [[TMP42]], [[TMP40]]
; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = mul nsw <4 x i32> [[TMP43]], [[TMP41]]
; UNROLL-NO-IC-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
; UNROLL-NO-IC-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]]
; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[TMP46]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP44]], <4 x i32>* [[TMP49]], align 4, !alias.scope !40, !noalias !39
; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, i32* [[TMP46]], i32 4
; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = bitcast i32* [[TMP50]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP51]], align 4, !alias.scope !40, !noalias !39
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP37]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP37]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-IC: for.body:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP53:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
; UNROLL-NO-IC-NEXT: store i32 7, i32* [[ARRAYCIDX]], align 4
; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; UNROLL-NO-IC-NEXT: [[TMP53]] = load i16, i16* [[CUR_INDEX]], align 2
; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP53]] to i32
; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @PR34711(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[C1:%.*]] = bitcast i32* [[C:%.*]] to i8*
; UNROLL-NO-VF-NEXT: [[B3:%.*]] = bitcast i32* [[B:%.*]] to i8*
; UNROLL-NO-VF-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
; UNROLL-NO-VF-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; UNROLL-NO-VF: vector.memcheck:
; UNROLL-NO-VF-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[C]], i64 [[N]]
; UNROLL-NO-VF-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; UNROLL-NO-VF-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
; UNROLL-NO-VF-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
; UNROLL-NO-VF-NEXT: [[SCEVGEP6:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 0, i64 1
; UNROLL-NO-VF-NEXT: [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8*
; UNROLL-NO-VF-NEXT: [[SCEVGEP8:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 [[N]], i64 0
; UNROLL-NO-VF-NEXT: [[SCEVGEP89:%.*]] = bitcast i16* [[SCEVGEP8]] to i8*
; UNROLL-NO-VF-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[C1]], [[SCEVGEP45]]
; UNROLL-NO-VF-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[B3]], [[SCEVGEP2]]
; UNROLL-NO-VF-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; UNROLL-NO-VF-NEXT: [[BOUND010:%.*]] = icmp ult i8* [[C1]], [[SCEVGEP89]]
; UNROLL-NO-VF-NEXT: [[BOUND111:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP2]]
; UNROLL-NO-VF-NEXT: [[FOUND_CONFLICT12:%.*]] = and i1 [[BOUND010]], [[BOUND111]]
; UNROLL-NO-VF-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]]
; UNROLL-NO-VF-NEXT: [[BOUND013:%.*]] = icmp ult i8* [[B3]], [[SCEVGEP89]]
; UNROLL-NO-VF-NEXT: [[BOUND114:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP45]]
; UNROLL-NO-VF-NEXT: [[FOUND_CONFLICT15:%.*]] = and i1 [[BOUND013]], [[BOUND114]]
; UNROLL-NO-VF-NEXT: [[CONFLICT_RDX16:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT15]]
; UNROLL-NO-VF-NEXT: br i1 [[CONFLICT_RDX16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-VF-NEXT: [[INDUCTION17:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDUCTION]]
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDUCTION17]]
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION]], i64 1
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION17]], i64 1
; UNROLL-NO-VF-NEXT: store i32 7, i32* [[TMP0]], align 4, !alias.scope !32, !noalias !35
; UNROLL-NO-VF-NEXT: store i32 7, i32* [[TMP1]], align 4, !alias.scope !32, !noalias !35
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2, !alias.scope !38
; UNROLL-NO-VF-NEXT: [[TMP5]] = load i16, i16* [[TMP3]], align 2, !alias.scope !38
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sext i16 [[TMP4]] to i32
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sext i16 [[TMP4]] to i32
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sext i16 [[TMP5]] to i32
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = mul nsw i32 [[TMP8]], [[TMP6]]
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = mul nsw i32 [[TMP9]], [[TMP7]]
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]]
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION17]]
; UNROLL-NO-VF-NEXT: store i32 [[TMP10]], i32* [[TMP12]], align 4, !alias.scope !39, !noalias !38
; UNROLL-NO-VF-NEXT: store i32 [[TMP11]], i32* [[TMP13]], align 4, !alias.scope !39, !noalias !38
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-VF: for.body:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
; UNROLL-NO-VF-NEXT: store i32 7, i32* [[ARRAYCIDX]], align 4
; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; UNROLL-NO-VF-NEXT: [[TMP15]] = load i16, i16* [[CUR_INDEX]], align 2
; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP15]] to i32
; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @PR34711(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[C1:%.*]] = bitcast i32* [[C:%.*]] to i8*
; SINK-AFTER-NEXT: [[B3:%.*]] = bitcast i32* [[B:%.*]] to i8*
; SINK-AFTER-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
; SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; SINK-AFTER: vector.memcheck:
; SINK-AFTER-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[C]], i64 [[N]]
; SINK-AFTER-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; SINK-AFTER-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
; SINK-AFTER-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
; SINK-AFTER-NEXT: [[SCEVGEP6:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 0, i64 1
; SINK-AFTER-NEXT: [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8*
; SINK-AFTER-NEXT: [[SCEVGEP8:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 [[N]], i64 0
; SINK-AFTER-NEXT: [[SCEVGEP89:%.*]] = bitcast i16* [[SCEVGEP8]] to i8*
; SINK-AFTER-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[C1]], [[SCEVGEP45]]
; SINK-AFTER-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[B3]], [[SCEVGEP2]]
; SINK-AFTER-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; SINK-AFTER-NEXT: [[BOUND010:%.*]] = icmp ult i8* [[C1]], [[SCEVGEP89]]
; SINK-AFTER-NEXT: [[BOUND111:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP2]]
; SINK-AFTER-NEXT: [[FOUND_CONFLICT12:%.*]] = and i1 [[BOUND010]], [[BOUND111]]
; SINK-AFTER-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]]
; SINK-AFTER-NEXT: [[BOUND013:%.*]] = icmp ult i8* [[B3]], [[SCEVGEP89]]
; SINK-AFTER-NEXT: [[BOUND114:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP45]]
; SINK-AFTER-NEXT: [[FOUND_CONFLICT15:%.*]] = and i1 [[BOUND013]], [[BOUND114]]
; SINK-AFTER-NEXT: [[CONFLICT_RDX16:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT15]]
; SINK-AFTER-NEXT: br i1 [[CONFLICT_RDX16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP0]]
; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1
; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1
; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1
; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP3]], i64 1
; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
; SINK-AFTER-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
; SINK-AFTER-NEXT: store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP10]], align 4, !alias.scope !33, !noalias !36
; SINK-AFTER-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP5]], align 2, !alias.scope !39
; SINK-AFTER-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP6]], align 2, !alias.scope !39
; SINK-AFTER-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP7]], align 2, !alias.scope !39
; SINK-AFTER-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP8]], align 2, !alias.scope !39
; SINK-AFTER-NEXT: [[TMP15:%.*]] = insertelement <4 x i16> poison, i16 [[TMP11]], i32 0
; SINK-AFTER-NEXT: [[TMP16:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP12]], i32 1
; SINK-AFTER-NEXT: [[TMP17:%.*]] = insertelement <4 x i16> [[TMP16]], i16 [[TMP13]], i32 2
; SINK-AFTER-NEXT: [[TMP18]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP14]], i32 3
; SINK-AFTER-NEXT: [[TMP19:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP18]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP20:%.*]] = sext <4 x i16> [[TMP19]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP21:%.*]] = sext <4 x i16> [[TMP18]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP22:%.*]] = mul nsw <4 x i32> [[TMP21]], [[TMP20]]
; SINK-AFTER-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
; SINK-AFTER-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0
; SINK-AFTER-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>*
; SINK-AFTER-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP25]], align 4, !alias.scope !40, !noalias !39
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP18]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP18]], i32 2
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; SINK-AFTER: for.body:
; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
; SINK-AFTER-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
; SINK-AFTER-NEXT: store i32 7, i32* [[ARRAYCIDX]], align 4
; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; SINK-AFTER-NEXT: [[TMP27]] = load i16, i16* [[CUR_INDEX]], align 2
; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP27]] to i32
; SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; SINK-AFTER-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
; NO-SINK-AFTER-LABEL: @PR34711(
; NO-SINK-AFTER-NEXT: entry:
; NO-SINK-AFTER-NEXT: [[C1:%.*]] = bitcast i32* [[C:%.*]] to i8*
; NO-SINK-AFTER-NEXT: [[B3:%.*]] = bitcast i32* [[B:%.*]] to i8*
; NO-SINK-AFTER-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
; NO-SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
; NO-SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; NO-SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; NO-SINK-AFTER: vector.memcheck:
; NO-SINK-AFTER-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[C]], i64 [[N]]
; NO-SINK-AFTER-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; NO-SINK-AFTER-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
; NO-SINK-AFTER-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
; NO-SINK-AFTER-NEXT: [[SCEVGEP6:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 0, i64 1
; NO-SINK-AFTER-NEXT: [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8*
; NO-SINK-AFTER-NEXT: [[SCEVGEP8:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 [[N]], i64 0
; NO-SINK-AFTER-NEXT: [[SCEVGEP89:%.*]] = bitcast i16* [[SCEVGEP8]] to i8*
; NO-SINK-AFTER-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[C1]], [[SCEVGEP45]]
; NO-SINK-AFTER-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[B3]], [[SCEVGEP2]]
; NO-SINK-AFTER-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; NO-SINK-AFTER-NEXT: [[BOUND010:%.*]] = icmp ult i8* [[C1]], [[SCEVGEP89]]
; NO-SINK-AFTER-NEXT: [[BOUND111:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP2]]
; NO-SINK-AFTER-NEXT: [[FOUND_CONFLICT12:%.*]] = and i1 [[BOUND010]], [[BOUND111]]
; NO-SINK-AFTER-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]]
; NO-SINK-AFTER-NEXT: [[BOUND013:%.*]] = icmp ult i8* [[B3]], [[SCEVGEP89]]
; NO-SINK-AFTER-NEXT: [[BOUND114:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP45]]
; NO-SINK-AFTER-NEXT: [[FOUND_CONFLICT15:%.*]] = and i1 [[BOUND013]], [[BOUND114]]
; NO-SINK-AFTER-NEXT: [[CONFLICT_RDX16:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT15]]
; NO-SINK-AFTER-NEXT: br i1 [[CONFLICT_RDX16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NO-SINK-AFTER: vector.ph:
; NO-SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; NO-SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
; NO-SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-SINK-AFTER: vector.body:
; NO-SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; NO-SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; NO-SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; NO-SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP0]]
; NO-SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1
; NO-SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1
; NO-SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1
; NO-SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP3]], i64 1
; NO-SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
; NO-SINK-AFTER-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
; NO-SINK-AFTER-NEXT: store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP10]], align 4, !alias.scope !33, !noalias !36
; NO-SINK-AFTER-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP5]], align 2, !alias.scope !39
; NO-SINK-AFTER-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP6]], align 2, !alias.scope !39
; NO-SINK-AFTER-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP7]], align 2, !alias.scope !39
; NO-SINK-AFTER-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP8]], align 2, !alias.scope !39
; NO-SINK-AFTER-NEXT: [[TMP15:%.*]] = insertelement <4 x i16> poison, i16 [[TMP11]], i32 0
; NO-SINK-AFTER-NEXT: [[TMP16:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP12]], i32 1
; NO-SINK-AFTER-NEXT: [[TMP17:%.*]] = insertelement <4 x i16> [[TMP16]], i16 [[TMP13]], i32 2
; NO-SINK-AFTER-NEXT: [[TMP18]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP14]], i32 3
; NO-SINK-AFTER-NEXT: [[TMP19:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP18]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; NO-SINK-AFTER-NEXT: [[TMP20:%.*]] = sext <4 x i16> [[TMP19]] to <4 x i32>
; NO-SINK-AFTER-NEXT: [[TMP21:%.*]] = sext <4 x i16> [[TMP18]] to <4 x i32>
; NO-SINK-AFTER-NEXT: [[TMP22:%.*]] = mul nsw <4 x i32> [[TMP21]], [[TMP20]]
; NO-SINK-AFTER-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
; NO-SINK-AFTER-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0
; NO-SINK-AFTER-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>*
; NO-SINK-AFTER-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP25]], align 4, !alias.scope !40, !noalias !39
; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; NO-SINK-AFTER-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
; NO-SINK-AFTER: middle.block:
; NO-SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP18]], i32 3
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP18]], i32 2
; NO-SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; NO-SINK-AFTER: scalar.ph:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; NO-SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; NO-SINK-AFTER: for.body:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
; NO-SINK-AFTER-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
; NO-SINK-AFTER-NEXT: store i32 7, i32* [[ARRAYCIDX]], align 4
; NO-SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; NO-SINK-AFTER-NEXT: [[TMP27]] = load i16, i16* [[CUR_INDEX]], align 2
; NO-SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP27]] to i32
; NO-SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; NO-SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; NO-SINK-AFTER-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; NO-SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; NO-SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; NO-SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
; NO-SINK-AFTER: for.end:
; NO-SINK-AFTER-NEXT: ret void
;
entry:
%pre.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 0, i64 0
%.pre = load i16, i16* %pre.index
br label %for.body
for.body:
%0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arraycidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
%cur.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 %indvars.iv, i64 1
store i32 7, i32* %arraycidx ; 1st instruction, to be widened.
%conv = sext i16 %0 to i32 ; 2nd, cast to sink after third.
%1 = load i16, i16* %cur.index ; 3rd, first-order-recurring load not widened.
%conv3 = sext i16 %1 to i32
%mul = mul nsw i32 %conv3, %conv
%arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
store i32 %mul, i32* %arrayidx5
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
; void no_sink_after(short *a, int n, int *b) {
; for(int i = 0; i < n; i++)
; b[i] = ((a[i] + 2) * a[i + 1]);
; }
;
;
define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) {
; CHECK-LABEL: @sink_after_with_multiple_users(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]]
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[SCEVGEP5]] to i32*
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[TMP1]], [[B]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[SCEVGEP]] to i16*
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP3]], [[TMP2]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !43
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32>
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], <i32 2, i32 2, i32 2, i32 2>
; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
; CHECK-NEXT: [[TMP10:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4, !alias.scope !46, !noalias !43
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP14]] = load i16, i16* [[ARRAYIDX2]], align 2
; CHECK-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; UNROLL-LABEL: @sink_after_with_multiple_users(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; UNROLL: vector.memcheck:
; UNROLL-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]]
; UNROLL-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; UNROLL-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
; UNROLL-NEXT: [[TMP1:%.*]] = bitcast i16* [[SCEVGEP5]] to i32*
; UNROLL-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[TMP1]], [[B]]
; UNROLL-NEXT: [[TMP2:%.*]] = bitcast i32* [[SCEVGEP]] to i16*
; UNROLL-NEXT: [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP3]], [[TMP2]]
; UNROLL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; UNROLL-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8
; UNROLL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1
; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
; UNROLL-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !43
; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i64 4
; UNROLL-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>*
; UNROLL-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2, !alias.scope !43
; UNROLL-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP9:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP10:%.*]] = sext <4 x i16> [[TMP8]] to <4 x i32>
; UNROLL-NEXT: [[TMP11:%.*]] = sext <4 x i16> [[TMP9]] to <4 x i32>
; UNROLL-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP10]], <i32 2, i32 2, i32 2, i32 2>
; UNROLL-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[TMP11]], <i32 2, i32 2, i32 2, i32 2>
; UNROLL-NEXT: [[TMP14:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
; UNROLL-NEXT: [[TMP15:%.*]] = sext <4 x i16> [[WIDE_LOAD7]] to <4 x i32>
; UNROLL-NEXT: [[TMP16:%.*]] = mul nsw <4 x i32> [[TMP12]], [[TMP14]]
; UNROLL-NEXT: [[TMP17:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP15]]
; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; UNROLL-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
; UNROLL-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP19]], align 4, !alias.scope !46, !noalias !43
; UNROLL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP18]], i64 4
; UNROLL-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <4 x i32>*
; UNROLL-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP21]], align 4, !alias.scope !46, !noalias !43
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i64 3
; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL: for.body:
; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP23:%.*]], [[FOR_BODY]] ]
; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; UNROLL-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; UNROLL-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2
; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-NEXT: [[TMP23]] = load i16, i16* [[ARRAYIDX2]], align 2
; UNROLL-NEXT: [[CONV3:%.*]] = sext i16 [[TMP23]] to i32
; UNROLL-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
; UNROLL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; UNROLL-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
; UNROLL: for.end:
; UNROLL-NEXT: ret void
;
; UNROLL-NO-IC-LABEL: @sink_after_with_multiple_users(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; UNROLL-NO-IC-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; UNROLL-NO-IC: vector.memcheck:
; UNROLL-NO-IC-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
; UNROLL-NO-IC-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; UNROLL-NO-IC-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
; UNROLL-NO-IC-NEXT: [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8*
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; UNROLL-NO-IC-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
; UNROLL-NO-IC-NEXT: [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
; UNROLL-NO-IC-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
; UNROLL-NO-IC-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
; UNROLL-NO-IC-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; UNROLL-NO-IC-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP1]], 1
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP2]], 1
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP4]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <4 x i16>*
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP8]], align 2, !alias.scope !43
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 4
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>*
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !43
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = sext <4 x i16> [[TMP12]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 2, i32 2, i32 2, i32 2>
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 2, i32 2, i32 2, i32 2>
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = sext <4 x i16> [[WIDE_LOAD7]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP17]]
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = mul nsw <4 x i32> [[TMP16]], [[TMP18]]
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* [[TMP24]], align 4, !alias.scope !46, !noalias !43
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 4
; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP26]], align 4, !alias.scope !46, !noalias !43
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-IC: for.body:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; UNROLL-NO-IC-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-NO-IC-NEXT: [[TMP28]] = load i16, i16* [[ARRAYIDX2]], align 2
; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP28]] to i32
; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @sink_after_with_multiple_users(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; UNROLL-NO-VF-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; UNROLL-NO-VF: vector.memcheck:
; UNROLL-NO-VF-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
; UNROLL-NO-VF-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; UNROLL-NO-VF-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
; UNROLL-NO-VF-NEXT: [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8*
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; UNROLL-NO-VF-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
; UNROLL-NO-VF-NEXT: [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
; UNROLL-NO-VF-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
; UNROLL-NO-VF-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
; UNROLL-NO-VF-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; UNROLL-NO-VF-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-VF-NEXT: [[INDUCTION7:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDUCTION]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[INDUCTION7]], 1
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]]
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]]
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]], align 2, !alias.scope !42
; UNROLL-NO-VF-NEXT: [[TMP6]] = load i16, i16* [[TMP4]], align 2, !alias.scope !42
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sext i16 [[TMP5]] to i32
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP7]], 2
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP8]], 2
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sext i16 [[TMP5]] to i32
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sext i16 [[TMP6]] to i32
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = mul nsw i32 [[TMP9]], [[TMP11]]
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = mul nsw i32 [[TMP10]], [[TMP12]]
; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]]
; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION7]]
; UNROLL-NO-VF-NEXT: store i32 [[TMP13]], i32* [[TMP15]], align 4, !alias.scope !45, !noalias !42
; UNROLL-NO-VF-NEXT: store i32 [[TMP14]], i32* [[TMP16]], align 4, !alias.scope !45, !noalias !42
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-VF: for.body:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; UNROLL-NO-VF-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2
; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-VF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-NO-VF-NEXT: [[TMP18]] = load i16, i16* [[ARRAYIDX2]], align 2
; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP18]] to i32
; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @sink_after_with_multiple_users(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; SINK-AFTER: vector.memcheck:
; SINK-AFTER-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
; SINK-AFTER-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; SINK-AFTER-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
; SINK-AFTER-NEXT: [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8*
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; SINK-AFTER-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
; SINK-AFTER-NEXT: [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
; SINK-AFTER-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
; SINK-AFTER-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
; SINK-AFTER-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; SINK-AFTER-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; SINK-AFTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]]
; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0
; SINK-AFTER-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !43
; SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], <i32 2, i32 2, i32 2, i32 2>
; SINK-AFTER-NEXT: [[TMP9:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP10:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP9]]
; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0
; SINK-AFTER-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>*
; SINK-AFTER-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP13]], align 4, !alias.scope !46, !noalias !43
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; SINK-AFTER: for.body:
; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; SINK-AFTER-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2
; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; SINK-AFTER-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
; SINK-AFTER-NEXT: [[TMP15]] = load i16, i16* [[ARRAYIDX2]], align 2
; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP15]] to i32
; SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; SINK-AFTER-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
; NO-SINK-AFTER-LABEL: @sink_after_with_multiple_users(
; NO-SINK-AFTER-NEXT: entry:
; NO-SINK-AFTER-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; NO-SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
; NO-SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; NO-SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; NO-SINK-AFTER: vector.memcheck:
; NO-SINK-AFTER-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
; NO-SINK-AFTER-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; NO-SINK-AFTER-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
; NO-SINK-AFTER-NEXT: [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8*
; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; NO-SINK-AFTER-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
; NO-SINK-AFTER-NEXT: [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
; NO-SINK-AFTER-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
; NO-SINK-AFTER-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
; NO-SINK-AFTER-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; NO-SINK-AFTER-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NO-SINK-AFTER: vector.ph:
; NO-SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; NO-SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
; NO-SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-SINK-AFTER: vector.body:
; NO-SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; NO-SINK-AFTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]]
; NO-SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0
; NO-SINK-AFTER-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
; NO-SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !43
; NO-SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; NO-SINK-AFTER-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32>
; NO-SINK-AFTER-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], <i32 2, i32 2, i32 2, i32 2>
; NO-SINK-AFTER-NEXT: [[TMP9:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
; NO-SINK-AFTER-NEXT: [[TMP10:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP9]]
; NO-SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
; NO-SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0
; NO-SINK-AFTER-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>*
; NO-SINK-AFTER-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP13]], align 4, !alias.scope !46, !noalias !43
; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; NO-SINK-AFTER-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; NO-SINK-AFTER: middle.block:
; NO-SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; NO-SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; NO-SINK-AFTER: scalar.ph:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; NO-SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; NO-SINK-AFTER: for.body:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
; NO-SINK-AFTER-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2
; NO-SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; NO-SINK-AFTER-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
; NO-SINK-AFTER-NEXT: [[TMP15]] = load i16, i16* [[ARRAYIDX2]], align 2
; NO-SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP15]] to i32
; NO-SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
; NO-SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; NO-SINK-AFTER-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
; NO-SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; NO-SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
; NO-SINK-AFTER: for.end:
; NO-SINK-AFTER-NEXT: ret void
;
entry:
%.pre = load i16, i16* %a
br label %for.body
for.body:
%0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%conv = sext i16 %0 to i32
%add = add nsw i32 %conv, 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%arrayidx2 = getelementptr inbounds i16, i16* %a, i64 %indvars.iv.next
%1 = load i16, i16* %arrayidx2
%conv3 = sext i16 %1 to i32
%mul = mul nsw i32 %add, %conv3
%arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
store i32 %mul, i32* %arrayidx5
%exitcond = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
; Do not sink branches: While branches are if-converted and do not require
; sinking, instructions with side effects (e.g. loads) conditioned by those
; branches will become users of the condition bit after vectorization and would
; need to be sunk if the loop is vectorized.
define void @do_not_sink_branch(i32 %x, i32* %in, i32* %out, i32 %tc) local_unnamed_addr #0 {
; CHECK-LABEL: @do_not_sink_branch(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP530:%.*]] = icmp sgt i32 [[TC:%.*]], 0
; CHECK-NEXT: br label [[FOR_BODY4:%.*]]
; CHECK: for.body4:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
; CHECK-NEXT: [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
; CHECK-NEXT: br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
; CHECK: cond.true:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[INDVARS_IV]] to i64
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
; CHECK-NEXT: br label [[COND_END]]
; CHECK: cond.end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[INDVARS_IV]] to i64
; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP1]]
; CHECK-NEXT: store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; CHECK-NEXT: [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
; CHECK: for.end12.loopexit:
; CHECK-NEXT: ret void
;
; UNROLL-LABEL: @do_not_sink_branch(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: [[CMP530:%.*]] = icmp sgt i32 [[TC:%.*]], 0
; UNROLL-NEXT: br label [[FOR_BODY4:%.*]]
; UNROLL: for.body4:
; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
; UNROLL-NEXT: [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
; UNROLL-NEXT: br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
; UNROLL: cond.true:
; UNROLL-NEXT: [[TMP0:%.*]] = zext i32 [[INDVARS_IV]] to i64
; UNROLL-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP0]]
; UNROLL-NEXT: [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
; UNROLL-NEXT: br label [[COND_END]]
; UNROLL: cond.end:
; UNROLL-NEXT: [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
; UNROLL-NEXT: [[TMP1:%.*]] = zext i32 [[INDVARS_IV]] to i64
; UNROLL-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP1]]
; UNROLL-NEXT: store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; UNROLL-NEXT: [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
; UNROLL: for.end12.loopexit:
; UNROLL-NEXT: ret void
;
; UNROLL-NO-IC-LABEL: @do_not_sink_branch(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[CMP530:%.*]] = icmp slt i32 0, [[TC:%.*]]
; UNROLL-NO-IC-NEXT: br label [[FOR_BODY4:%.*]]
; UNROLL-NO-IC: for.body4:
; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
; UNROLL-NO-IC-NEXT: [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
; UNROLL-NO-IC-NEXT: br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
; UNROLL-NO-IC: cond.true:
; UNROLL-NO-IC-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i32 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
; UNROLL-NO-IC-NEXT: br label [[COND_END]]
; UNROLL-NO-IC: cond.end:
; UNROLL-NO-IC-NEXT: [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i32 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
; UNROLL-NO-IC: for.end12.loopexit:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @do_not_sink_branch(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[CMP530:%.*]] = icmp slt i32 0, [[TC:%.*]]
; UNROLL-NO-VF-NEXT: br label [[FOR_BODY4:%.*]]
; UNROLL-NO-VF: for.body4:
; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
; UNROLL-NO-VF-NEXT: [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
; UNROLL-NO-VF-NEXT: br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
; UNROLL-NO-VF: cond.true:
; UNROLL-NO-VF-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i32 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
; UNROLL-NO-VF-NEXT: br label [[COND_END]]
; UNROLL-NO-VF: cond.end:
; UNROLL-NO-VF-NEXT: [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i32 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; UNROLL-NO-VF-NEXT: [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
; UNROLL-NO-VF: for.end12.loopexit:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @do_not_sink_branch(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[CMP530:%.*]] = icmp slt i32 0, [[TC:%.*]]
; SINK-AFTER-NEXT: br label [[FOR_BODY4:%.*]]
; SINK-AFTER: for.body4:
; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
; SINK-AFTER-NEXT: [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
; SINK-AFTER-NEXT: br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
; SINK-AFTER: cond.true:
; SINK-AFTER-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i32 [[INDVARS_IV]]
; SINK-AFTER-NEXT: [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
; SINK-AFTER-NEXT: br label [[COND_END]]
; SINK-AFTER: cond.end:
; SINK-AFTER-NEXT: [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
; SINK-AFTER-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i32 [[INDVARS_IV]]
; SINK-AFTER-NEXT: store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; SINK-AFTER-NEXT: [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
; SINK-AFTER: for.end12.loopexit:
; SINK-AFTER-NEXT: ret void
;
; NO-SINK-AFTER-LABEL: @do_not_sink_branch(
; NO-SINK-AFTER-NEXT: entry:
; NO-SINK-AFTER-NEXT: [[CMP530:%.*]] = icmp slt i32 0, [[TC:%.*]]
; NO-SINK-AFTER-NEXT: br label [[FOR_BODY4:%.*]]
; NO-SINK-AFTER: for.body4:
; NO-SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
; NO-SINK-AFTER-NEXT: [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
; NO-SINK-AFTER-NEXT: br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
; NO-SINK-AFTER: cond.true:
; NO-SINK-AFTER-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i32 [[INDVARS_IV]]
; NO-SINK-AFTER-NEXT: [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
; NO-SINK-AFTER-NEXT: br label [[COND_END]]
; NO-SINK-AFTER: cond.end:
; NO-SINK-AFTER-NEXT: [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
; NO-SINK-AFTER-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i32 [[INDVARS_IV]]
; NO-SINK-AFTER-NEXT: store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
; NO-SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; NO-SINK-AFTER-NEXT: [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
; NO-SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
; NO-SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
; NO-SINK-AFTER: for.end12.loopexit:
; NO-SINK-AFTER-NEXT: ret void
;
entry:
%cmp530 = icmp slt i32 0, %tc
br label %for.body4
for.body4: ; preds = %cond.end, %entry
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %cond.end ]
%cmp534 = phi i1 [ %cmp530, %entry ], [ %cmp5, %cond.end ]
br i1 %cmp534, label %cond.true, label %cond.end
cond.true: ; preds = %for.body4
%arrayidx7 = getelementptr inbounds i32, i32* %in, i32 %indvars.iv
%in.val = load i32, i32* %arrayidx7, align 4
br label %cond.end
cond.end: ; preds = %for.body4, %cond.true
%cond = phi i32 [ %in.val, %cond.true ], [ 0, %for.body4 ]
%arrayidx8 = getelementptr inbounds i32, i32* %out, i32 %indvars.iv
store i32 %cond, i32* %arrayidx8, align 4
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
%cmp5 = icmp slt i32 %indvars.iv.next, %tc
%exitcond = icmp eq i32 %indvars.iv.next, %x
br i1 %exitcond, label %for.end12.loopexit, label %for.body4
for.end12.loopexit: ; preds = %cond.end
ret void
}
; Dead instructions, like the exit condition are not part of the actual VPlan
; and do not need to be sunk. PR44634.
define void @sink_dead_inst() {
;
; CHECK-LABEL: @sink_dead_inst(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi i16 [ -24, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP1]] = add i16 [[TMP0]], 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP3:%.*]] = or i16 [[TMP0]], 1
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = zext i16 [[TMP3]] to i32
; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ -27, [[ENTRY]] ], [ 13, [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
; CHECK-NEXT: [[SCALAR_RECUR5:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR5]], 15
; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; CHECK-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; UNROLL-LABEL: @sink_dead_inst(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[TMP0:%.*]] = phi i16 [ -24, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NEXT: [[TMP1]] = add i16 [[TMP0]], 8
; UNROLL-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[TMP3:%.*]] = or i16 [[TMP0]], 5
; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = zext i16 [[TMP3]] to i32
; UNROLL-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT3]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ -27, [[ENTRY]] ], [ 13, [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: br label [[FOR_COND:%.*]]
; UNROLL: for.cond:
; UNROLL-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
; UNROLL-NEXT: [[SCALAR_RECUR6:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT5]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
; UNROLL-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR6]], 15
; UNROLL-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; UNROLL-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
; UNROLL-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
; UNROLL: for.end:
; UNROLL-NEXT: ret void
;
; UNROLL-NO-IC-LABEL: @sink_dead_inst(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR2:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[STEP_ADD]], <i16 1, i16 1, i16 1, i16 1>
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP3]] = zext <4 x i16> [[TMP1]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR2]], <4 x i32> [[TMP2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add <4 x i16> [[TMP0]], <i16 5, i16 5, i16 5, i16 5>
; UNROLL-NO-IC-NEXT: [[TMP7]] = add <4 x i16> [[TMP1]], <i16 5, i16 5, i16 5, i16 5>
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP6]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[TMP8]], <i16 10, i16 10, i16 10, i16 10>
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = sub <4 x i16> [[TMP9]], <i16 10, i16 10, i16 10, i16 10>
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 43, 40
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP7]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP7]], i32 2
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT3]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_COND:%.*]]
; UNROLL-NO-IC: for.cond:
; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR6:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT5]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
; UNROLL-NO-IC-NEXT: [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10
; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR6]], 15
; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; UNROLL-NO-IC-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
; UNROLL-NO-IC-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @sink_dead_inst(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR2:%.*]] = phi i32 [ -27, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[TMP0]]
; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i16 [[OFFSET_IDX]], 0
; UNROLL-NO-VF-NEXT: [[INDUCTION1:%.*]] = add i16 [[OFFSET_IDX]], 1
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i16 [[INDUCTION]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i16 [[INDUCTION1]], 1
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32
; UNROLL-NO-VF-NEXT: [[TMP4]] = zext i16 [[TMP2]] to i32
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = add i16 [[TMP1]], 5
; UNROLL-NO-VF-NEXT: [[TMP6]] = add i16 [[TMP2]], 5
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sub i16 [[VECTOR_RECUR]], 10
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sub i16 [[TMP5]], 10
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 42
; UNROLL-NO-VF-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 43, 42
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 15, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_COND:%.*]]
; UNROLL-NO-VF: for.cond:
; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR4:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
; UNROLL-NO-VF-NEXT: [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10
; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR4]], 15
; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; UNROLL-NO-VF-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
; UNROLL-NO-VF-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @sink_dead_inst(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
; SINK-AFTER-NEXT: [[TMP1]] = zext <4 x i16> [[TMP0]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP3]] = add <4 x i16> [[TMP0]], <i16 5, i16 5, i16 5, i16 5>
; SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[TMP4]], <i16 10, i16 10, i16 10, i16 10>
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
; SINK-AFTER-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 43, 40
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP3]], i32 2
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[FOR_COND:%.*]]
; SINK-AFTER: for.cond:
; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR5:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
; SINK-AFTER-NEXT: [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10
; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR5]], 15
; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; SINK-AFTER-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
; SINK-AFTER-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
; NO-SINK-AFTER-LABEL: @sink_dead_inst(
; NO-SINK-AFTER-NEXT: entry:
; NO-SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-SINK-AFTER: vector.ph:
; NO-SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-SINK-AFTER: vector.body:
; NO-SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
; NO-SINK-AFTER-NEXT: [[TMP1]] = zext <4 x i16> [[TMP0]] to <4 x i32>
; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; NO-SINK-AFTER-NEXT: [[TMP3]] = add <4 x i16> [[TMP0]], <i16 5, i16 5, i16 5, i16 5>
; NO-SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; NO-SINK-AFTER-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[TMP4]], <i16 10, i16 10, i16 10, i16 10>
; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; NO-SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
; NO-SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
; NO-SINK-AFTER-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
; NO-SINK-AFTER: middle.block:
; NO-SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 43, 40
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP3]], i32 2
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
; NO-SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; NO-SINK-AFTER: scalar.ph:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ]
; NO-SINK-AFTER-NEXT: br label [[FOR_COND:%.*]]
; NO-SINK-AFTER: for.cond:
; NO-SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR5:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
; NO-SINK-AFTER-NEXT: [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10
; NO-SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR5]], 15
; NO-SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; NO-SINK-AFTER-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
; NO-SINK-AFTER-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
; NO-SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
; NO-SINK-AFTER: for.end:
; NO-SINK-AFTER-NEXT: ret void
;
entry:
br label %for.cond
for.cond:
%iv = phi i16 [ -27, %entry ], [ %iv.next, %for.cond ]
%rec.1 = phi i16 [ 0, %entry ], [ %rec.1.prev, %for.cond ]
%rec.2 = phi i32 [ -27, %entry ], [ %rec.2.prev, %for.cond ]
%use.rec.1 = sub i16 %rec.1, 10
%cmp = icmp eq i32 %rec.2, 15
%iv.next = add i16 %iv, 1
%rec.2.prev = zext i16 %iv.next to i32
%rec.1.prev = add i16 %iv.next, 5
br i1 %cmp, label %for.end, label %for.cond
for.end:
ret void
}
define i32 @sink_into_replication_region(i32 %y) {
;
; CHECK-LABEL: @sink_into_replication_region(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[PRED_UDIV_CONTINUE8]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[PRED_UDIV_CONTINUE8]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; CHECK: pred.udiv.if:
; CHECK-NEXT: [[TMP4:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE]]
; CHECK: pred.udiv.continue:
; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
; CHECK: pred.udiv.if3:
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -1
; CHECK-NEXT: [[TMP9:%.*]] = udiv i32 219220132, [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP9]], i64 1
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE4]]
; CHECK: pred.udiv.continue4:
; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF3]] ]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
; CHECK: pred.udiv.if5:
; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OFFSET_IDX]], -2
; CHECK-NEXT: [[TMP14:%.*]] = udiv i32 219220132, [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i64 2
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE6]]
; CHECK: pred.udiv.continue6:
; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP15]], [[PRED_UDIV_IF5]] ]
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8]]
; CHECK: pred.udiv.if7:
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], -3
; CHECK-NEXT: [[TMP19:%.*]] = udiv i32 219220132, [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP19]], i64 3
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE8]]
; CHECK: pred.udiv.continue8:
; CHECK-NEXT: [[TMP21]] = phi <4 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP20]], [[PRED_UDIV_IF7]] ]
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI]], [[TMP22]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52:![0-9]+]], !llvm.loop [[LOOP53:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP23]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]])
; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[VAR]]
; CHECK: bb2:
; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF54:![0-9]+]], !llvm.loop [[LOOP55:![0-9]+]]
;
; UNROLL-LABEL: @sink_into_replication_region(
; UNROLL-NEXT: bb:
; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; UNROLL-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; UNROLL-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7
; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; UNROLL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1
; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE19:%.*]] ]
; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_UDIV_CONTINUE19]] ]
; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_UDIV_CONTINUE19]] ]
; UNROLL-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_UDIV_CONTINUE19]] ]
; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; UNROLL-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
; UNROLL-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NEXT: [[VEC_IV:%.*]] = or <4 x i32> [[BROADCAST_SPLAT4]], <i32 0, i32 1, i32 2, i32 3>
; UNROLL-NEXT: [[VEC_IV5:%.*]] = or <4 x i32> [[BROADCAST_SPLAT4]], <i32 4, i32 5, i32 6, i32 7>
; UNROLL-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
; UNROLL-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV5]], [[BROADCAST_SPLAT]]
; UNROLL-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
; UNROLL-NEXT: br i1 [[TMP4]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; UNROLL: pred.udiv.if:
; UNROLL-NEXT: [[TMP5:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
; UNROLL-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE]]
; UNROLL: pred.udiv.continue:
; UNROLL-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ]
; UNROLL-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
; UNROLL-NEXT: br i1 [[TMP8]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
; UNROLL: pred.udiv.if6:
; UNROLL-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -1
; UNROLL-NEXT: [[TMP10:%.*]] = udiv i32 219220132, [[TMP9]]
; UNROLL-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP10]], i64 1
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE7]]
; UNROLL: pred.udiv.continue7:
; UNROLL-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ [[TMP7]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP11]], [[PRED_UDIV_IF6]] ]
; UNROLL-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2
; UNROLL-NEXT: br i1 [[TMP13]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
; UNROLL: pred.udiv.if8:
; UNROLL-NEXT: [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], -2
; UNROLL-NEXT: [[TMP15:%.*]] = udiv i32 219220132, [[TMP14]]
; UNROLL-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP15]], i64 2
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE9]]
; UNROLL: pred.udiv.continue9:
; UNROLL-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP16]], [[PRED_UDIV_IF8]] ]
; UNROLL-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3
; UNROLL-NEXT: br i1 [[TMP18]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11:%.*]]
; UNROLL: pred.udiv.if10:
; UNROLL-NEXT: [[TMP19:%.*]] = add i32 [[OFFSET_IDX]], -3
; UNROLL-NEXT: [[TMP20:%.*]] = udiv i32 219220132, [[TMP19]]
; UNROLL-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP20]], i64 3
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE11]]
; UNROLL: pred.udiv.continue11:
; UNROLL-NEXT: [[TMP22:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP21]], [[PRED_UDIV_IF10]] ]
; UNROLL-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
; UNROLL-NEXT: br i1 [[TMP23]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]]
; UNROLL: pred.udiv.if12:
; UNROLL-NEXT: [[TMP24:%.*]] = add i32 [[OFFSET_IDX]], -4
; UNROLL-NEXT: [[TMP25:%.*]] = udiv i32 219220132, [[TMP24]]
; UNROLL-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> poison, i32 [[TMP25]], i64 0
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE13]]
; UNROLL: pred.udiv.continue13:
; UNROLL-NEXT: [[TMP27:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE11]] ], [ [[TMP26]], [[PRED_UDIV_IF12]] ]
; UNROLL-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
; UNROLL-NEXT: br i1 [[TMP28]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]]
; UNROLL: pred.udiv.if14:
; UNROLL-NEXT: [[TMP29:%.*]] = add i32 [[OFFSET_IDX]], -5
; UNROLL-NEXT: [[TMP30:%.*]] = udiv i32 219220132, [[TMP29]]
; UNROLL-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> [[TMP27]], i32 [[TMP30]], i64 1
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE15]]
; UNROLL: pred.udiv.continue15:
; UNROLL-NEXT: [[TMP32:%.*]] = phi <4 x i32> [ [[TMP27]], [[PRED_UDIV_CONTINUE13]] ], [ [[TMP31]], [[PRED_UDIV_IF14]] ]
; UNROLL-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
; UNROLL-NEXT: br i1 [[TMP33]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]]
; UNROLL: pred.udiv.if16:
; UNROLL-NEXT: [[TMP34:%.*]] = add i32 [[OFFSET_IDX]], -6
; UNROLL-NEXT: [[TMP35:%.*]] = udiv i32 219220132, [[TMP34]]
; UNROLL-NEXT: [[TMP36:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP35]], i64 2
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE17]]
; UNROLL: pred.udiv.continue17:
; UNROLL-NEXT: [[TMP37:%.*]] = phi <4 x i32> [ [[TMP32]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP36]], [[PRED_UDIV_IF16]] ]
; UNROLL-NEXT: [[TMP38:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
; UNROLL-NEXT: br i1 [[TMP38]], label [[PRED_UDIV_IF18:%.*]], label [[PRED_UDIV_CONTINUE19]]
; UNROLL: pred.udiv.if18:
; UNROLL-NEXT: [[TMP39:%.*]] = add i32 [[OFFSET_IDX]], -7
; UNROLL-NEXT: [[TMP40:%.*]] = udiv i32 219220132, [[TMP39]]
; UNROLL-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP40]], i64 3
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE19]]
; UNROLL: pred.udiv.continue19:
; UNROLL-NEXT: [[TMP42]] = phi <4 x i32> [ [[TMP37]], [[PRED_UDIV_CONTINUE17]] ], [ [[TMP41]], [[PRED_UDIV_IF18]] ]
; UNROLL-NEXT: [[TMP43:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[TMP22]], <4 x i32> [[TMP42]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP45]] = add <4 x i32> [[VEC_PHI]], [[TMP43]]
; UNROLL-NEXT: [[TMP46]] = add <4 x i32> [[VEC_PHI2]], [[TMP44]]
; UNROLL-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; UNROLL-NEXT: [[TMP47:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52:![0-9]+]], !llvm.loop [[LOOP53:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[TMP48:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI2]]
; UNROLL-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]]
; UNROLL-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP48]], [[TMP49]]
; UNROLL-NEXT: [[TMP50:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; UNROLL-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: br label [[BB2:%.*]]
; UNROLL: bb1:
; UNROLL-NEXT: [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: ret i32 [[VAR]]
; UNROLL: bb2:
; UNROLL-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF54:![0-9]+]], !llvm.loop [[LOOP55:![0-9]+]]
;
; UNROLL-NO-IC-LABEL: @sink_into_replication_region(
; UNROLL-NO-IC-NEXT: bb:
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; UNROLL-NO-IC-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 -1, i32 -2, i32 -3>
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE19:%.*]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE19]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_UDIV_CONTINUE19]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_UDIV_CONTINUE19]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_UDIV_CONTINUE19]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 -4, i32 -4, i32 -4, i32 -4>
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT4]], <i32 0, i32 1, i32 2, i32 3>
; UNROLL-NO-IC-NEXT: [[VEC_IV5:%.*]] = add <4 x i32> [[BROADCAST_SPLAT4]], <i32 4, i32 5, i32 6, i32 7>
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV5]], [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; UNROLL-NO-IC: pred.udiv.if:
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 0
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = udiv i32 219220132, [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE]]
; UNROLL-NO-IC: pred.udiv.continue:
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UDIV_IF]] ]
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
; UNROLL-NO-IC-NEXT: br i1 [[TMP9]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
; UNROLL-NO-IC: pred.udiv.if6:
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i32 [[OFFSET_IDX]], -1
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = udiv i32 219220132, [[TMP10]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE7]]
; UNROLL-NO-IC: pred.udiv.continue7:
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF6]] ]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[TMP14]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
; UNROLL-NO-IC: pred.udiv.if8:
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], -2
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = udiv i32 219220132, [[TMP15]]
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE9]]
; UNROLL-NO-IC: pred.udiv.continue9:
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP17]], [[PRED_UDIV_IF8]] ]
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11:%.*]]
; UNROLL-NO-IC: pred.udiv.if10:
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], -3
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = udiv i32 219220132, [[TMP20]]
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE11]]
; UNROLL-NO-IC: pred.udiv.continue11:
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP22]], [[PRED_UDIV_IF10]] ]
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP24]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]]
; UNROLL-NO-IC: pred.udiv.if12:
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], -4
; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = udiv i32 219220132, [[TMP25]]
; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE13]]
; UNROLL-NO-IC: pred.udiv.continue13:
; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE11]] ], [ [[TMP27]], [[PRED_UDIV_IF12]] ]
; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; UNROLL-NO-IC-NEXT: br i1 [[TMP29]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]]
; UNROLL-NO-IC: pred.udiv.if14:
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], -5
; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = udiv i32 219220132, [[TMP30]]
; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE15]]
; UNROLL-NO-IC: pred.udiv.continue15:
; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_UDIV_CONTINUE13]] ], [ [[TMP32]], [[PRED_UDIV_IF14]] ]
; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[TMP34]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]]
; UNROLL-NO-IC: pred.udiv.if16:
; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = add i32 [[OFFSET_IDX]], -6
; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = udiv i32 219220132, [[TMP35]]
; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE17]]
; UNROLL-NO-IC: pred.udiv.continue17:
; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP37]], [[PRED_UDIV_IF16]] ]
; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
; UNROLL-NO-IC-NEXT: br i1 [[TMP39]], label [[PRED_UDIV_IF18:%.*]], label [[PRED_UDIV_CONTINUE19]]
; UNROLL-NO-IC: pred.udiv.if18:
; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = add i32 [[OFFSET_IDX]], -7
; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = udiv i32 219220132, [[TMP40]]
; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE19]]
; UNROLL-NO-IC: pred.udiv.continue19:
; UNROLL-NO-IC-NEXT: [[TMP43]] = phi <4 x i32> [ [[TMP38]], [[PRED_UDIV_CONTINUE17]] ], [ [[TMP42]], [[PRED_UDIV_IF18]] ]
; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP23]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> [[TMP43]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP46]] = add <4 x i32> [[VEC_PHI]], [[TMP44]]
; UNROLL-NO-IC-NEXT: [[TMP47]] = add <4 x i32> [[VEC_PHI2]], [[TMP45]]
; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]]
; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI2]]
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 -4, i32 -4, i32 -4, i32 -4>
; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52:![0-9]+]], !llvm.loop [[LOOP53:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP49]], [[TMP48]]
; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP43]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP43]], i32 2
; UNROLL-NO-IC-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-IC: bb1:
; UNROLL-NO-IC-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[VAR]]
; UNROLL-NO-IC: bb2:
; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
; UNROLL-NO-IC-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
; UNROLL-NO-IC-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1
; UNROLL-NO-IC-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF54:![0-9]+]], !llvm.loop [[LOOP55:![0-9]+]]
;
; UNROLL-NO-VF-LABEL: @sink_into_replication_region(
; UNROLL-NO-VF-NEXT: bb:
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; UNROLL-NO-VF-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE5:%.*]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[PRED_UDIV_CONTINUE5]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[PRED_UDIV_CONTINUE5]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_UDIV_CONTINUE5]] ]
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; UNROLL-NO-VF-NEXT: [[VEC_IV:%.*]] = add i32 [[INDEX]], 0
; UNROLL-NO-VF-NEXT: [[VEC_IV3:%.*]] = add i32 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp ule i32 [[VEC_IV]], [[TRIP_COUNT_MINUS_1]]
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp ule i32 [[VEC_IV3]], [[TRIP_COUNT_MINUS_1]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; UNROLL-NO-VF: pred.udiv.if:
; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = udiv i32 219220132, [[INDUCTION]]
; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE]]
; UNROLL-NO-VF: pred.udiv.continue:
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ]
; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5]]
; UNROLL-NO-VF: pred.udiv.if4:
; UNROLL-NO-VF-NEXT: [[INDUCTION1:%.*]] = add i32 [[OFFSET_IDX]], -1
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = udiv i32 219220132, [[INDUCTION1]]
; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE5]]
; UNROLL-NO-VF: pred.udiv.continue5:
; UNROLL-NO-VF-NEXT: [[TMP7]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP6]], [[PRED_UDIV_IF4]] ]
; UNROLL-NO-VF-NEXT: [[TMP8]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]]
; UNROLL-NO-VF-NEXT: [[TMP9]] = add i32 [[VEC_PHI2]], [[TMP5]]
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = select i1 [[TMP2]], i32 [[TMP8]], i32 [[VEC_PHI]]
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = select i1 [[TMP3]], i32 [[TMP9]], i32 [[VEC_PHI2]]
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF51:![0-9]+]], !llvm.loop [[LOOP52:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP11]], [[TMP10]]
; UNROLL-NO-VF-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-VF: bb1:
; UNROLL-NO-VF-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: ret i32 [[VAR]]
; UNROLL-NO-VF: bb2:
; UNROLL-NO-VF-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
; UNROLL-NO-VF-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1
; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF53:![0-9]+]], !llvm.loop [[LOOP54:![0-9]+]]
;
; SINK-AFTER-LABEL: @sink_into_replication_region(
; SINK-AFTER-NEXT: bb:
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; SINK-AFTER-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; SINK-AFTER-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
; SINK-AFTER-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
; SINK-AFTER-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 -1, i32 -2, i32 -3>
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ]
; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE8]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_UDIV_CONTINUE8]] ]
; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_UDIV_CONTINUE8]] ]
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
; SINK-AFTER-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3>
; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
; SINK-AFTER-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
; SINK-AFTER-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; SINK-AFTER: pred.udiv.if:
; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0
; SINK-AFTER-NEXT: [[TMP5:%.*]] = udiv i32 219220132, [[TMP4]]
; SINK-AFTER-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE]]
; SINK-AFTER: pred.udiv.continue:
; SINK-AFTER-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ]
; SINK-AFTER-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
; SINK-AFTER-NEXT: br i1 [[TMP8]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
; SINK-AFTER: pred.udiv.if3:
; SINK-AFTER-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -1
; SINK-AFTER-NEXT: [[TMP10:%.*]] = udiv i32 219220132, [[TMP9]]
; SINK-AFTER-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP10]], i32 1
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE4]]
; SINK-AFTER: pred.udiv.continue4:
; SINK-AFTER-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ [[TMP7]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP11]], [[PRED_UDIV_IF3]] ]
; SINK-AFTER-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
; SINK-AFTER: pred.udiv.if5:
; SINK-AFTER-NEXT: [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], -2
; SINK-AFTER-NEXT: [[TMP15:%.*]] = udiv i32 219220132, [[TMP14]]
; SINK-AFTER-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP15]], i32 2
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE6]]
; SINK-AFTER: pred.udiv.continue6:
; SINK-AFTER-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP16]], [[PRED_UDIV_IF5]] ]
; SINK-AFTER-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
; SINK-AFTER-NEXT: br i1 [[TMP18]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8]]
; SINK-AFTER: pred.udiv.if7:
; SINK-AFTER-NEXT: [[TMP19:%.*]] = add i32 [[OFFSET_IDX]], -3
; SINK-AFTER-NEXT: [[TMP20:%.*]] = udiv i32 219220132, [[TMP19]]
; SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP20]], i32 3
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE8]]
; SINK-AFTER: pred.udiv.continue8:
; SINK-AFTER-NEXT: [[TMP22]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP21]], [[PRED_UDIV_IF7]] ]
; SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]]
; SINK-AFTER-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 -4, i32 -4, i32 -4, i32 -4>
; SINK-AFTER-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52:![0-9]+]], !llvm.loop [[LOOP53:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]])
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP22]], i32 2
; SINK-AFTER-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: br label [[BB2:%.*]]
; SINK-AFTER: bb1:
; SINK-AFTER-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: ret i32 [[VAR]]
; SINK-AFTER: bb2:
; SINK-AFTER-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
; SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
; SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1
; SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF54:![0-9]+]], !llvm.loop [[LOOP55:![0-9]+]]
;
; NO-SINK-AFTER-LABEL: @sink_into_replication_region(
; NO-SINK-AFTER-NEXT: bb:
; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; NO-SINK-AFTER-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; NO-SINK-AFTER-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; NO-SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-SINK-AFTER: vector.ph:
; NO-SINK-AFTER-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
; NO-SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
; NO-SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; NO-SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
; NO-SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; NO-SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; NO-SINK-AFTER-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
; NO-SINK-AFTER-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; NO-SINK-AFTER-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 -1, i32 -2, i32 -3>
; NO-SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-SINK-AFTER: vector.body:
; NO-SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ]
; NO-SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE8]] ]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_UDIV_CONTINUE8]] ]
; NO-SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_UDIV_CONTINUE8]] ]
; NO-SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; NO-SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
; NO-SINK-AFTER-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
; NO-SINK-AFTER-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3>
; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
; NO-SINK-AFTER-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
; NO-SINK-AFTER-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; NO-SINK-AFTER: pred.udiv.if:
; NO-SINK-AFTER-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0
; NO-SINK-AFTER-NEXT: [[TMP5:%.*]] = udiv i32 219220132, [[TMP4]]
; NO-SINK-AFTER-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
; NO-SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE]]
; NO-SINK-AFTER: pred.udiv.continue:
; NO-SINK-AFTER-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ]
; NO-SINK-AFTER-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
; NO-SINK-AFTER-NEXT: br i1 [[TMP8]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
; NO-SINK-AFTER: pred.udiv.if3:
; NO-SINK-AFTER-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -1
; NO-SINK-AFTER-NEXT: [[TMP10:%.*]] = udiv i32 219220132, [[TMP9]]
; NO-SINK-AFTER-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP10]], i32 1
; NO-SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE4]]
; NO-SINK-AFTER: pred.udiv.continue4:
; NO-SINK-AFTER-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ [[TMP7]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP11]], [[PRED_UDIV_IF3]] ]
; NO-SINK-AFTER-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
; NO-SINK-AFTER-NEXT: br i1 [[TMP13]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
; NO-SINK-AFTER: pred.udiv.if5:
; NO-SINK-AFTER-NEXT: [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], -2
; NO-SINK-AFTER-NEXT: [[TMP15:%.*]] = udiv i32 219220132, [[TMP14]]
; NO-SINK-AFTER-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP15]], i32 2
; NO-SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE6]]
; NO-SINK-AFTER: pred.udiv.continue6:
; NO-SINK-AFTER-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP16]], [[PRED_UDIV_IF5]] ]
; NO-SINK-AFTER-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
; NO-SINK-AFTER-NEXT: br i1 [[TMP18]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8]]
; NO-SINK-AFTER: pred.udiv.if7:
; NO-SINK-AFTER-NEXT: [[TMP19:%.*]] = add i32 [[OFFSET_IDX]], -3
; NO-SINK-AFTER-NEXT: [[TMP20:%.*]] = udiv i32 219220132, [[TMP19]]
; NO-SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP20]], i32 3
; NO-SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE8]]
; NO-SINK-AFTER: pred.udiv.continue8:
; NO-SINK-AFTER-NEXT: [[TMP22]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP21]], [[PRED_UDIV_IF7]] ]
; NO-SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; NO-SINK-AFTER-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]]
; NO-SINK-AFTER-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; NO-SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 -4, i32 -4, i32 -4, i32 -4>
; NO-SINK-AFTER-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52:![0-9]+]], !llvm.loop [[LOOP53:![0-9]+]]
; NO-SINK-AFTER: middle.block:
; NO-SINK-AFTER-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]])
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP22]], i32 2
; NO-SINK-AFTER-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
; NO-SINK-AFTER: scalar.ph:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
; NO-SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: br label [[BB2:%.*]]
; NO-SINK-AFTER: bb1:
; NO-SINK-AFTER-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: ret i32 [[VAR]]
; NO-SINK-AFTER: bb2:
; NO-SINK-AFTER-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; NO-SINK-AFTER-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; NO-SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
; NO-SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
; NO-SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1
; NO-SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
; NO-SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF54:![0-9]+]], !llvm.loop [[LOOP55:![0-9]+]]
;
bb:
br label %bb2
bb1: ; preds = %bb2
%var = phi i32 [ %var6, %bb2 ]
ret i32 %var
bb2: ; preds = %bb2, %bb
%var3 = phi i32 [ %var8, %bb2 ], [ %y, %bb ]
%var4 = phi i32 [ %var7, %bb2 ], [ 0, %bb ]
%var5 = phi i32 [ %var6, %bb2 ], [ 0, %bb ]
%var6 = add i32 %var5, %var4
%var7 = udiv i32 219220132, %var3
%var8 = add nsw i32 %var3, -1
%var9 = icmp slt i32 %var3, 2
br i1 %var9, label %bb1, label %bb2, !prof !2
}
define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
;
; CHECK-LABEL: @sink_into_replication_region_multiple(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ]
; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[PRED_STORE_CONTINUE15]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[PRED_STORE_CONTINUE15]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[PRED_STORE_CONTINUE15]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], -1
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -2
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -3
; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <4 x i32> [[VEC_IND2]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; CHECK: pred.udiv.if:
; CHECK-NEXT: [[TMP7:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE]]
; CHECK: pred.udiv.continue:
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
; CHECK: pred.udiv.if4:
; CHECK-NEXT: [[TMP11:%.*]] = udiv i32 219220132, [[TMP2]]
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP11]], i64 1
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE5]]
; CHECK: pred.udiv.continue5:
; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF4]] ]
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
; CHECK: pred.udiv.if6:
; CHECK-NEXT: [[TMP15:%.*]] = udiv i32 219220132, [[TMP3]]
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP15]], i64 2
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE7]]
; CHECK: pred.udiv.continue7:
; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP16]], [[PRED_UDIV_IF6]] ]
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3
; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
; CHECK: pred.udiv.if8:
; CHECK-NEXT: [[TMP19:%.*]] = udiv i32 219220132, [[TMP4]]
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP19]], i64 3
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE9]]
; CHECK: pred.udiv.continue9:
; CHECK-NEXT: [[TMP21]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP20]], [[PRED_UDIV_IF8]] ]
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI]], [[TMP22]]
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0
; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[TMP25:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 [[TMP25]]
; CHECK-NEXT: store i32 [[OFFSET_IDX]], i32* [[TMP26]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1
; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
; CHECK: pred.store.if10:
; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP29]]
; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP30]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE11]]
; CHECK: pred.store.continue11:
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2
; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
; CHECK: pred.store.if12:
; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP33]]
; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP34]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE13]]
; CHECK: pred.store.continue13:
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3
; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]]
; CHECK: pred.store.if14:
; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[INDEX]], 3
; CHECK-NEXT: [[TMP37:%.*]] = sext i32 [[TMP36]] to i64
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP37]]
; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP38]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE15]]
; CHECK: pred.store.continue15:
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP39]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52]], !llvm.loop [[LOOP56:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP23]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP40]])
; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP41]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[VAR]]
; CHECK: bb2:
; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF54]], !llvm.loop [[LOOP57:![0-9]+]]
;
; UNROLL-LABEL: @sink_into_replication_region_multiple(
; UNROLL-NEXT: bb:
; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; UNROLL-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; UNROLL-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7
; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; UNROLL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1
; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE35:%.*]] ]
; UNROLL-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[PRED_STORE_CONTINUE35]] ]
; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_STORE_CONTINUE35]] ]
; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_STORE_CONTINUE35]] ]
; UNROLL-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_STORE_CONTINUE35]] ]
; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; UNROLL-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], -1
; UNROLL-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -2
; UNROLL-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -3
; UNROLL-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -4
; UNROLL-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], -5
; UNROLL-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -6
; UNROLL-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -7
; UNROLL-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], <i32 4, i32 4, i32 4, i32 4>
; UNROLL-NEXT: [[TMP9:%.*]] = icmp ule <4 x i32> [[VEC_IND3]], [[BROADCAST_SPLAT]]
; UNROLL-NEXT: [[TMP10:%.*]] = icmp ule <4 x i32> [[STEP_ADD4]], [[BROADCAST_SPLAT]]
; UNROLL-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP9]], i64 0
; UNROLL-NEXT: br i1 [[TMP11]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; UNROLL: pred.udiv.if:
; UNROLL-NEXT: [[TMP12:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
; UNROLL-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> poison, i32 [[TMP12]], i64 0
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE]]
; UNROLL: pred.udiv.continue:
; UNROLL-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP13]], [[PRED_UDIV_IF]] ]
; UNROLL-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP9]], i64 1
; UNROLL-NEXT: br i1 [[TMP15]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
; UNROLL: pred.udiv.if8:
; UNROLL-NEXT: [[TMP16:%.*]] = udiv i32 219220132, [[TMP2]]
; UNROLL-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP16]], i64 1
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE9]]
; UNROLL: pred.udiv.continue9:
; UNROLL-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP17]], [[PRED_UDIV_IF8]] ]
; UNROLL-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP9]], i64 2
; UNROLL-NEXT: br i1 [[TMP19]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11:%.*]]
; UNROLL: pred.udiv.if10:
; UNROLL-NEXT: [[TMP20:%.*]] = udiv i32 219220132, [[TMP3]]
; UNROLL-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP20]], i64 2
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE11]]
; UNROLL: pred.udiv.continue11:
; UNROLL-NEXT: [[TMP22:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP21]], [[PRED_UDIV_IF10]] ]
; UNROLL-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP9]], i64 3
; UNROLL-NEXT: br i1 [[TMP23]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]]
; UNROLL: pred.udiv.if12:
; UNROLL-NEXT: [[TMP24:%.*]] = udiv i32 219220132, [[TMP4]]
; UNROLL-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP22]], i32 [[TMP24]], i64 3
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE13]]
; UNROLL: pred.udiv.continue13:
; UNROLL-NEXT: [[TMP26:%.*]] = phi <4 x i32> [ [[TMP22]], [[PRED_UDIV_CONTINUE11]] ], [ [[TMP25]], [[PRED_UDIV_IF12]] ]
; UNROLL-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP10]], i64 0
; UNROLL-NEXT: br i1 [[TMP27]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]]
; UNROLL: pred.udiv.if14:
; UNROLL-NEXT: [[TMP28:%.*]] = udiv i32 219220132, [[TMP5]]
; UNROLL-NEXT: [[TMP29:%.*]] = insertelement <4 x i32> poison, i32 [[TMP28]], i64 0
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE15]]
; UNROLL: pred.udiv.continue15:
; UNROLL-NEXT: [[TMP30:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE13]] ], [ [[TMP29]], [[PRED_UDIV_IF14]] ]
; UNROLL-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP10]], i64 1
; UNROLL-NEXT: br i1 [[TMP31]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]]
; UNROLL: pred.udiv.if16:
; UNROLL-NEXT: [[TMP32:%.*]] = udiv i32 219220132, [[TMP6]]
; UNROLL-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP32]], i64 1
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE17]]
; UNROLL: pred.udiv.continue17:
; UNROLL-NEXT: [[TMP34:%.*]] = phi <4 x i32> [ [[TMP30]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP33]], [[PRED_UDIV_IF16]] ]
; UNROLL-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP10]], i64 2
; UNROLL-NEXT: br i1 [[TMP35]], label [[PRED_UDIV_IF18:%.*]], label [[PRED_UDIV_CONTINUE19:%.*]]
; UNROLL: pred.udiv.if18:
; UNROLL-NEXT: [[TMP36:%.*]] = udiv i32 219220132, [[TMP7]]
; UNROLL-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP34]], i32 [[TMP36]], i64 2
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE19]]
; UNROLL: pred.udiv.continue19:
; UNROLL-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP34]], [[PRED_UDIV_CONTINUE17]] ], [ [[TMP37]], [[PRED_UDIV_IF18]] ]
; UNROLL-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP10]], i64 3
; UNROLL-NEXT: br i1 [[TMP39]], label [[PRED_UDIV_IF20:%.*]], label [[PRED_UDIV_CONTINUE21:%.*]]
; UNROLL: pred.udiv.if20:
; UNROLL-NEXT: [[TMP40:%.*]] = udiv i32 219220132, [[TMP8]]
; UNROLL-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP40]], i64 3
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE21]]
; UNROLL: pred.udiv.continue21:
; UNROLL-NEXT: [[TMP42]] = phi <4 x i32> [ [[TMP38]], [[PRED_UDIV_CONTINUE19]] ], [ [[TMP41]], [[PRED_UDIV_IF20]] ]
; UNROLL-NEXT: [[TMP43:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP26]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[TMP26]], <4 x i32> [[TMP42]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NEXT: [[TMP45]] = add <4 x i32> [[VEC_PHI]], [[TMP43]]
; UNROLL-NEXT: [[TMP46]] = add <4 x i32> [[VEC_PHI7]], [[TMP44]]
; UNROLL-NEXT: [[TMP47:%.*]] = extractelement <4 x i1> [[TMP9]], i64 0
; UNROLL-NEXT: br i1 [[TMP47]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; UNROLL: pred.store.if:
; UNROLL-NEXT: [[TMP48:%.*]] = sext i32 [[INDEX]] to i64
; UNROLL-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 [[TMP48]]
; UNROLL-NEXT: store i32 [[OFFSET_IDX]], i32* [[TMP49]], align 4
; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE]]
; UNROLL: pred.store.continue:
; UNROLL-NEXT: [[TMP50:%.*]] = extractelement <4 x i1> [[TMP9]], i64 1
; UNROLL-NEXT: br i1 [[TMP50]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]]
; UNROLL: pred.store.if22:
; UNROLL-NEXT: [[TMP51:%.*]] = or i32 [[INDEX]], 1
; UNROLL-NEXT: [[TMP52:%.*]] = sext i32 [[TMP51]] to i64
; UNROLL-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP52]]
; UNROLL-NEXT: store i32 [[TMP2]], i32* [[TMP53]], align 4
; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE23]]
; UNROLL: pred.store.continue23:
; UNROLL-NEXT: [[TMP54:%.*]] = extractelement <4 x i1> [[TMP9]], i64 2
; UNROLL-NEXT: br i1 [[TMP54]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]]
; UNROLL: pred.store.if24:
; UNROLL-NEXT: [[TMP55:%.*]] = or i32 [[INDEX]], 2
; UNROLL-NEXT: [[TMP56:%.*]] = sext i32 [[TMP55]] to i64
; UNROLL-NEXT: [[TMP57:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP56]]
; UNROLL-NEXT: store i32 [[TMP3]], i32* [[TMP57]], align 4
; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE25]]
; UNROLL: pred.store.continue25:
; UNROLL-NEXT: [[TMP58:%.*]] = extractelement <4 x i1> [[TMP9]], i64 3
; UNROLL-NEXT: br i1 [[TMP58]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]]
; UNROLL: pred.store.if26:
; UNROLL-NEXT: [[TMP59:%.*]] = or i32 [[INDEX]], 3
; UNROLL-NEXT: [[TMP60:%.*]] = sext i32 [[TMP59]] to i64
; UNROLL-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP60]]
; UNROLL-NEXT: store i32 [[TMP4]], i32* [[TMP61]], align 4
; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE27]]
; UNROLL: pred.store.continue27:
; UNROLL-NEXT: [[TMP62:%.*]] = extractelement <4 x i1> [[TMP10]], i64 0
; UNROLL-NEXT: br i1 [[TMP62]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]]
; UNROLL: pred.store.if28:
; UNROLL-NEXT: [[TMP63:%.*]] = or i32 [[INDEX]], 4
; UNROLL-NEXT: [[TMP64:%.*]] = sext i32 [[TMP63]] to i64
; UNROLL-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP64]]
; UNROLL-NEXT: store i32 [[TMP5]], i32* [[TMP65]], align 4
; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE29]]
; UNROLL: pred.store.continue29:
; UNROLL-NEXT: [[TMP66:%.*]] = extractelement <4 x i1> [[TMP10]], i64 1
; UNROLL-NEXT: br i1 [[TMP66]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31:%.*]]
; UNROLL: pred.store.if30:
; UNROLL-NEXT: [[TMP67:%.*]] = or i32 [[INDEX]], 5
; UNROLL-NEXT: [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
; UNROLL-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP68]]
; UNROLL-NEXT: store i32 [[TMP6]], i32* [[TMP69]], align 4
; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE31]]
; UNROLL: pred.store.continue31:
; UNROLL-NEXT: [[TMP70:%.*]] = extractelement <4 x i1> [[TMP10]], i64 2
; UNROLL-NEXT: br i1 [[TMP70]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33:%.*]]
; UNROLL: pred.store.if32:
; UNROLL-NEXT: [[TMP71:%.*]] = or i32 [[INDEX]], 6
; UNROLL-NEXT: [[TMP72:%.*]] = sext i32 [[TMP71]] to i64
; UNROLL-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP72]]
; UNROLL-NEXT: store i32 [[TMP7]], i32* [[TMP73]], align 4
; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE33]]
; UNROLL: pred.store.continue33:
; UNROLL-NEXT: [[TMP74:%.*]] = extractelement <4 x i1> [[TMP10]], i64 3
; UNROLL-NEXT: br i1 [[TMP74]], label [[PRED_STORE_IF34:%.*]], label [[PRED_STORE_CONTINUE35]]
; UNROLL: pred.store.if34:
; UNROLL-NEXT: [[TMP75:%.*]] = or i32 [[INDEX]], 7
; UNROLL-NEXT: [[TMP76:%.*]] = sext i32 [[TMP75]] to i64
; UNROLL-NEXT: [[TMP77:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP76]]
; UNROLL-NEXT: store i32 [[TMP8]], i32* [[TMP77]], align 4
; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE35]]
; UNROLL: pred.store.continue35:
; UNROLL-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; UNROLL-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND3]], <i32 8, i32 8, i32 8, i32 8>
; UNROLL-NEXT: [[TMP78:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[TMP78]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52]], !llvm.loop [[LOOP56:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[TMP79:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI7]]
; UNROLL-NEXT: [[TMP80:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]]
; UNROLL-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP79]], [[TMP80]]
; UNROLL-NEXT: [[TMP81:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; UNROLL-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: br label [[BB2:%.*]]
; UNROLL: bb1:
; UNROLL-NEXT: [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: ret i32 [[VAR]]
; UNROLL: bb2:
; UNROLL-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF54]], !llvm.loop [[LOOP57:![0-9]+]]
;
; UNROLL-NO-IC-LABEL: @sink_into_replication_region_multiple(
; UNROLL-NO-IC-NEXT: bb:
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; UNROLL-NO-IC-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 -1, i32 -2, i32 -3>
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE35:%.*]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE35]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[PRED_STORE_CONTINUE35]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_STORE_CONTINUE35]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_STORE_CONTINUE35]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_STORE_CONTINUE35]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 -4, i32 -4, i32 -4, i32 -4>
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], -4
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -5
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -6
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -7
; UNROLL-NO-IC-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], <i32 4, i32 4, i32 4, i32 4>
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp ule <4 x i32> [[VEC_IND3]], [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp ule <4 x i32> [[STEP_ADD4]], [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; UNROLL-NO-IC: pred.udiv.if:
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = udiv i32 219220132, [[TMP2]]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> poison, i32 [[TMP13]], i32 0
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE]]
; UNROLL-NO-IC: pred.udiv.continue:
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP14]], [[PRED_UDIV_IF]] ]
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1
; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
; UNROLL-NO-IC: pred.udiv.if8:
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = udiv i32 219220132, [[TMP3]]
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP17]], i32 1
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE9]]
; UNROLL-NO-IC: pred.udiv.continue9:
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP15]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP18]], [[PRED_UDIV_IF8]] ]
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP10]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[TMP20]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11:%.*]]
; UNROLL-NO-IC: pred.udiv.if10:
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = udiv i32 219220132, [[TMP4]]
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP21]], i32 2
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE11]]
; UNROLL-NO-IC: pred.udiv.continue11:
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP22]], [[PRED_UDIV_IF10]] ]
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP10]], i32 3
; UNROLL-NO-IC-NEXT: br i1 [[TMP24]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]]
; UNROLL-NO-IC: pred.udiv.if12:
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = udiv i32 219220132, [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP23]], i32 [[TMP25]], i32 3
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE13]]
; UNROLL-NO-IC: pred.udiv.continue13:
; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = phi <4 x i32> [ [[TMP23]], [[PRED_UDIV_CONTINUE11]] ], [ [[TMP26]], [[PRED_UDIV_IF12]] ]
; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP11]], i32 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP28]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]]
; UNROLL-NO-IC: pred.udiv.if14:
; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = udiv i32 219220132, [[TMP6]]
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP29]], i32 0
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE15]]
; UNROLL-NO-IC: pred.udiv.continue15:
; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE13]] ], [ [[TMP30]], [[PRED_UDIV_IF14]] ]
; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP11]], i32 1
; UNROLL-NO-IC-NEXT: br i1 [[TMP32]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]]
; UNROLL-NO-IC: pred.udiv.if16:
; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = udiv i32 219220132, [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP33]], i32 1
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE17]]
; UNROLL-NO-IC: pred.udiv.continue17:
; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = phi <4 x i32> [ [[TMP31]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP34]], [[PRED_UDIV_IF16]] ]
; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP11]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[TMP36]], label [[PRED_UDIV_IF18:%.*]], label [[PRED_UDIV_CONTINUE19:%.*]]
; UNROLL-NO-IC: pred.udiv.if18:
; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = udiv i32 219220132, [[TMP8]]
; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = insertelement <4 x i32> [[TMP35]], i32 [[TMP37]], i32 2
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE19]]
; UNROLL-NO-IC: pred.udiv.continue19:
; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP35]], [[PRED_UDIV_CONTINUE17]] ], [ [[TMP38]], [[PRED_UDIV_IF18]] ]
; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP11]], i32 3
; UNROLL-NO-IC-NEXT: br i1 [[TMP40]], label [[PRED_UDIV_IF20:%.*]], label [[PRED_UDIV_CONTINUE21:%.*]]
; UNROLL-NO-IC: pred.udiv.if20:
; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = udiv i32 219220132, [[TMP9]]
; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP39]], i32 [[TMP41]], i32 3
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE21]]
; UNROLL-NO-IC: pred.udiv.continue21:
; UNROLL-NO-IC-NEXT: [[TMP43]] = phi <4 x i32> [ [[TMP39]], [[PRED_UDIV_CONTINUE19]] ], [ [[TMP42]], [[PRED_UDIV_IF20]] ]
; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP27]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = shufflevector <4 x i32> [[TMP27]], <4 x i32> [[TMP43]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP46]] = add <4 x i32> [[VEC_PHI]], [[TMP44]]
; UNROLL-NO-IC-NEXT: [[TMP47]] = add <4 x i32> [[VEC_PHI7]], [[TMP45]]
; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP48]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; UNROLL-NO-IC: pred.store.if:
; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = add i32 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP49]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP2]], i32* [[TMP50]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE]]
; UNROLL-NO-IC: pred.store.continue:
; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1
; UNROLL-NO-IC-NEXT: br i1 [[TMP51]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]]
; UNROLL-NO-IC: pred.store.if22:
; UNROLL-NO-IC-NEXT: [[TMP52:%.*]] = add i32 [[INDEX]], 1
; UNROLL-NO-IC-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP52]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP3]], i32* [[TMP53]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE23]]
; UNROLL-NO-IC: pred.store.continue23:
; UNROLL-NO-IC-NEXT: [[TMP54:%.*]] = extractelement <4 x i1> [[TMP10]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[TMP54]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]]
; UNROLL-NO-IC: pred.store.if24:
; UNROLL-NO-IC-NEXT: [[TMP55:%.*]] = add i32 [[INDEX]], 2
; UNROLL-NO-IC-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP55]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP4]], i32* [[TMP56]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE25]]
; UNROLL-NO-IC: pred.store.continue25:
; UNROLL-NO-IC-NEXT: [[TMP57:%.*]] = extractelement <4 x i1> [[TMP10]], i32 3
; UNROLL-NO-IC-NEXT: br i1 [[TMP57]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]]
; UNROLL-NO-IC: pred.store.if26:
; UNROLL-NO-IC-NEXT: [[TMP58:%.*]] = add i32 [[INDEX]], 3
; UNROLL-NO-IC-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP58]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP5]], i32* [[TMP59]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE27]]
; UNROLL-NO-IC: pred.store.continue27:
; UNROLL-NO-IC-NEXT: [[TMP60:%.*]] = extractelement <4 x i1> [[TMP11]], i32 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP60]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]]
; UNROLL-NO-IC: pred.store.if28:
; UNROLL-NO-IC-NEXT: [[TMP61:%.*]] = add i32 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[TMP62:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP61]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP6]], i32* [[TMP62]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE29]]
; UNROLL-NO-IC: pred.store.continue29:
; UNROLL-NO-IC-NEXT: [[TMP63:%.*]] = extractelement <4 x i1> [[TMP11]], i32 1
; UNROLL-NO-IC-NEXT: br i1 [[TMP63]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31:%.*]]
; UNROLL-NO-IC: pred.store.if30:
; UNROLL-NO-IC-NEXT: [[TMP64:%.*]] = add i32 [[INDEX]], 5
; UNROLL-NO-IC-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP64]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP7]], i32* [[TMP65]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE31]]
; UNROLL-NO-IC: pred.store.continue31:
; UNROLL-NO-IC-NEXT: [[TMP66:%.*]] = extractelement <4 x i1> [[TMP11]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[TMP66]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33:%.*]]
; UNROLL-NO-IC: pred.store.if32:
; UNROLL-NO-IC-NEXT: [[TMP67:%.*]] = add i32 [[INDEX]], 6
; UNROLL-NO-IC-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP67]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP8]], i32* [[TMP68]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE33]]
; UNROLL-NO-IC: pred.store.continue33:
; UNROLL-NO-IC-NEXT: [[TMP69:%.*]] = extractelement <4 x i1> [[TMP11]], i32 3
; UNROLL-NO-IC-NEXT: br i1 [[TMP69]], label [[PRED_STORE_IF34:%.*]], label [[PRED_STORE_CONTINUE35]]
; UNROLL-NO-IC: pred.store.if34:
; UNROLL-NO-IC-NEXT: [[TMP70:%.*]] = add i32 [[INDEX]], 7
; UNROLL-NO-IC-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP70]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP9]], i32* [[TMP71]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE35]]
; UNROLL-NO-IC: pred.store.continue35:
; UNROLL-NO-IC-NEXT: [[TMP72:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]]
; UNROLL-NO-IC-NEXT: [[TMP73:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI7]]
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 -4, i32 -4, i32 -4, i32 -4>
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[STEP_ADD4]], <i32 4, i32 4, i32 4, i32 4>
; UNROLL-NO-IC-NEXT: [[TMP74:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52]], !llvm.loop [[LOOP56:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP73]], [[TMP72]]
; UNROLL-NO-IC-NEXT: [[TMP75:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP43]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP43]], i32 2
; UNROLL-NO-IC-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP75]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-IC: bb1:
; UNROLL-NO-IC-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP75]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[VAR]]
; UNROLL-NO-IC: bb2:
; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[G:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[IV]]
; UNROLL-NO-IC-NEXT: [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
; UNROLL-NO-IC-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
; UNROLL-NO-IC-NEXT: store i32 [[VAR3]], i32* [[G]], align 4
; UNROLL-NO-IC-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1
; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
; UNROLL-NO-IC-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF54]], !llvm.loop [[LOOP57:![0-9]+]]
;
; UNROLL-NO-VF-LABEL: @sink_into_replication_region_multiple(
; UNROLL-NO-VF-NEXT: bb:
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; UNROLL-NO-VF-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[PRED_STORE_CONTINUE9]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[PRED_STORE_CONTINUE9]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI5:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE9]] ]
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0
; UNROLL-NO-VF-NEXT: [[INDUCTION2:%.*]] = add i32 [[OFFSET_IDX]], -1
; UNROLL-NO-VF-NEXT: [[INDUCTION3:%.*]] = add i32 [[INDEX]], 0
; UNROLL-NO-VF-NEXT: [[INDUCTION4:%.*]] = add i32 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp ule i32 [[INDUCTION3]], [[TRIP_COUNT_MINUS_1]]
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp ule i32 [[INDUCTION4]], [[TRIP_COUNT_MINUS_1]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; UNROLL-NO-VF: pred.udiv.if:
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = udiv i32 219220132, [[INDUCTION]]
; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE]]
; UNROLL-NO-VF: pred.udiv.continue:
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ]
; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
; UNROLL-NO-VF: pred.udiv.if6:
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = udiv i32 219220132, [[INDUCTION2]]
; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE7]]
; UNROLL-NO-VF: pred.udiv.continue7:
; UNROLL-NO-VF-NEXT: [[TMP7]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP6]], [[PRED_UDIV_IF6]] ]
; UNROLL-NO-VF-NEXT: [[TMP8]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]]
; UNROLL-NO-VF-NEXT: [[TMP9]] = add i32 [[VEC_PHI5]], [[TMP5]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; UNROLL-NO-VF: pred.store.if:
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDUCTION3]]
; UNROLL-NO-VF-NEXT: store i32 [[INDUCTION]], i32* [[TMP10]], align 4
; UNROLL-NO-VF-NEXT: br label [[PRED_STORE_CONTINUE]]
; UNROLL-NO-VF: pred.store.continue:
; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]]
; UNROLL-NO-VF: pred.store.if8:
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[INDUCTION4]]
; UNROLL-NO-VF-NEXT: store i32 [[INDUCTION2]], i32* [[TMP11]], align 4
; UNROLL-NO-VF-NEXT: br label [[PRED_STORE_CONTINUE9]]
; UNROLL-NO-VF: pred.store.continue9:
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = select i1 [[TMP2]], i32 [[TMP8]], i32 [[VEC_PHI]]
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = select i1 [[TMP3]], i32 [[TMP9]], i32 [[VEC_PHI5]]
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF51]], !llvm.loop [[LOOP55:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP12]]
; UNROLL-NO-VF-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-VF: bb1:
; UNROLL-NO-VF-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: ret i32 [[VAR]]
; UNROLL-NO-VF: bb2:
; UNROLL-NO-VF-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[G:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[IV]]
; UNROLL-NO-VF-NEXT: [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
; UNROLL-NO-VF-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
; UNROLL-NO-VF-NEXT: store i32 [[VAR3]], i32* [[G]], align 4
; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1
; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF53]], !llvm.loop [[LOOP56:![0-9]+]]
;
; SINK-AFTER-LABEL: @sink_into_replication_region_multiple(
; SINK-AFTER-NEXT: bb:
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; SINK-AFTER-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; SINK-AFTER-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
; SINK-AFTER-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
; SINK-AFTER-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 -1, i32 -2, i32 -3>
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ]
; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE15]] ]
; SINK-AFTER-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[PRED_STORE_CONTINUE15]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_STORE_CONTINUE15]] ]
; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_STORE_CONTINUE15]] ]
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2
; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3
; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp ule <4 x i32> [[VEC_IND2]], [[BROADCAST_SPLAT]]
; SINK-AFTER-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
; SINK-AFTER-NEXT: br i1 [[TMP7]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; SINK-AFTER: pred.udiv.if:
; SINK-AFTER-NEXT: [[TMP8:%.*]] = udiv i32 219220132, [[TMP2]]
; SINK-AFTER-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE]]
; SINK-AFTER: pred.udiv.continue:
; SINK-AFTER-NEXT: [[TMP10:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UDIV_IF]] ]
; SINK-AFTER-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
; SINK-AFTER-NEXT: br i1 [[TMP11]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
; SINK-AFTER: pred.udiv.if4:
; SINK-AFTER-NEXT: [[TMP12:%.*]] = udiv i32 219220132, [[TMP3]]
; SINK-AFTER-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP12]], i32 1
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE5]]
; SINK-AFTER: pred.udiv.continue5:
; SINK-AFTER-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF4]] ]
; SINK-AFTER-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
; SINK-AFTER-NEXT: br i1 [[TMP15]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
; SINK-AFTER: pred.udiv.if6:
; SINK-AFTER-NEXT: [[TMP16:%.*]] = udiv i32 219220132, [[TMP4]]
; SINK-AFTER-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP16]], i32 2
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE7]]
; SINK-AFTER: pred.udiv.continue7:
; SINK-AFTER-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP17]], [[PRED_UDIV_IF6]] ]
; SINK-AFTER-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
; SINK-AFTER-NEXT: br i1 [[TMP19]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
; SINK-AFTER: pred.udiv.if8:
; SINK-AFTER-NEXT: [[TMP20:%.*]] = udiv i32 219220132, [[TMP5]]
; SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP20]], i32 3
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE9]]
; SINK-AFTER: pred.udiv.continue9:
; SINK-AFTER-NEXT: [[TMP22]] = phi <4 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP21]], [[PRED_UDIV_IF8]] ]
; SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]]
; SINK-AFTER-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
; SINK-AFTER-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; SINK-AFTER: pred.store.if:
; SINK-AFTER-NEXT: [[TMP26:%.*]] = add i32 [[INDEX]], 0
; SINK-AFTER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP26]]
; SINK-AFTER-NEXT: store i32 [[TMP2]], i32* [[TMP27]], align 4
; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE]]
; SINK-AFTER: pred.store.continue:
; SINK-AFTER-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
; SINK-AFTER-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
; SINK-AFTER: pred.store.if10:
; SINK-AFTER-NEXT: [[TMP29:%.*]] = add i32 [[INDEX]], 1
; SINK-AFTER-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP29]]
; SINK-AFTER-NEXT: store i32 [[TMP3]], i32* [[TMP30]], align 4
; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE11]]
; SINK-AFTER: pred.store.continue11:
; SINK-AFTER-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
; SINK-AFTER-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
; SINK-AFTER: pred.store.if12:
; SINK-AFTER-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 2
; SINK-AFTER-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP32]]
; SINK-AFTER-NEXT: store i32 [[TMP4]], i32* [[TMP33]], align 4
; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE13]]
; SINK-AFTER: pred.store.continue13:
; SINK-AFTER-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
; SINK-AFTER-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]]
; SINK-AFTER: pred.store.if14:
; SINK-AFTER-NEXT: [[TMP35:%.*]] = add i32 [[INDEX]], 3
; SINK-AFTER-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP35]]
; SINK-AFTER-NEXT: store i32 [[TMP5]], i32* [[TMP36]], align 4
; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE15]]
; SINK-AFTER: pred.store.continue15:
; SINK-AFTER-NEXT: [[TMP37:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 -4, i32 -4, i32 -4, i32 -4>
; SINK-AFTER-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4>
; SINK-AFTER-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52]], !llvm.loop [[LOOP56:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP37]])
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP22]], i32 2
; SINK-AFTER-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: br label [[BB2:%.*]]
; SINK-AFTER: bb1:
; SINK-AFTER-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: ret i32 [[VAR]]
; SINK-AFTER: bb2:
; SINK-AFTER-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[G:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[IV]]
; SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
; SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
; SINK-AFTER-NEXT: store i32 [[VAR3]], i32* [[G]], align 4
; SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1
; SINK-AFTER-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
; SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF54]], !llvm.loop [[LOOP57:![0-9]+]]
;
; NO-SINK-AFTER-LABEL: @sink_into_replication_region_multiple(
; NO-SINK-AFTER-NEXT: bb:
; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; NO-SINK-AFTER-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; NO-SINK-AFTER-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; NO-SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-SINK-AFTER: vector.ph:
; NO-SINK-AFTER-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
; NO-SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
; NO-SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; NO-SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
; NO-SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; NO-SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; NO-SINK-AFTER-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
; NO-SINK-AFTER-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; NO-SINK-AFTER-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 -1, i32 -2, i32 -3>
; NO-SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-SINK-AFTER: vector.body:
; NO-SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ]
; NO-SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE15]] ]
; NO-SINK-AFTER-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[PRED_STORE_CONTINUE15]] ]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_STORE_CONTINUE15]] ]
; NO-SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_STORE_CONTINUE15]] ]
; NO-SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0
; NO-SINK-AFTER-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
; NO-SINK-AFTER-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2
; NO-SINK-AFTER-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3
; NO-SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp ule <4 x i32> [[VEC_IND2]], [[BROADCAST_SPLAT]]
; NO-SINK-AFTER-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
; NO-SINK-AFTER-NEXT: br i1 [[TMP7]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; NO-SINK-AFTER: pred.udiv.if:
; NO-SINK-AFTER-NEXT: [[TMP8:%.*]] = udiv i32 219220132, [[TMP2]]
; NO-SINK-AFTER-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0
; NO-SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE]]
; NO-SINK-AFTER: pred.udiv.continue:
; NO-SINK-AFTER-NEXT: [[TMP10:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UDIV_IF]] ]
; NO-SINK-AFTER-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
; NO-SINK-AFTER-NEXT: br i1 [[TMP11]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
; NO-SINK-AFTER: pred.udiv.if4:
; NO-SINK-AFTER-NEXT: [[TMP12:%.*]] = udiv i32 219220132, [[TMP3]]
; NO-SINK-AFTER-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP12]], i32 1
; NO-SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE5]]
; NO-SINK-AFTER: pred.udiv.continue5:
; NO-SINK-AFTER-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF4]] ]
; NO-SINK-AFTER-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
; NO-SINK-AFTER-NEXT: br i1 [[TMP15]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
; NO-SINK-AFTER: pred.udiv.if6:
; NO-SINK-AFTER-NEXT: [[TMP16:%.*]] = udiv i32 219220132, [[TMP4]]
; NO-SINK-AFTER-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP16]], i32 2
; NO-SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE7]]
; NO-SINK-AFTER: pred.udiv.continue7:
; NO-SINK-AFTER-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP17]], [[PRED_UDIV_IF6]] ]
; NO-SINK-AFTER-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
; NO-SINK-AFTER-NEXT: br i1 [[TMP19]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
; NO-SINK-AFTER: pred.udiv.if8:
; NO-SINK-AFTER-NEXT: [[TMP20:%.*]] = udiv i32 219220132, [[TMP5]]
; NO-SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP20]], i32 3
; NO-SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE9]]
; NO-SINK-AFTER: pred.udiv.continue9:
; NO-SINK-AFTER-NEXT: [[TMP22]] = phi <4 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP21]], [[PRED_UDIV_IF8]] ]
; NO-SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; NO-SINK-AFTER-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]]
; NO-SINK-AFTER-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
; NO-SINK-AFTER-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; NO-SINK-AFTER: pred.store.if:
; NO-SINK-AFTER-NEXT: [[TMP26:%.*]] = add i32 [[INDEX]], 0
; NO-SINK-AFTER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP26]]
; NO-SINK-AFTER-NEXT: store i32 [[TMP2]], i32* [[TMP27]], align 4
; NO-SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE]]
; NO-SINK-AFTER: pred.store.continue:
; NO-SINK-AFTER-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
; NO-SINK-AFTER-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
; NO-SINK-AFTER: pred.store.if10:
; NO-SINK-AFTER-NEXT: [[TMP29:%.*]] = add i32 [[INDEX]], 1
; NO-SINK-AFTER-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP29]]
; NO-SINK-AFTER-NEXT: store i32 [[TMP3]], i32* [[TMP30]], align 4
; NO-SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE11]]
; NO-SINK-AFTER: pred.store.continue11:
; NO-SINK-AFTER-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
; NO-SINK-AFTER-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
; NO-SINK-AFTER: pred.store.if12:
; NO-SINK-AFTER-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 2
; NO-SINK-AFTER-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP32]]
; NO-SINK-AFTER-NEXT: store i32 [[TMP4]], i32* [[TMP33]], align 4
; NO-SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE13]]
; NO-SINK-AFTER: pred.store.continue13:
; NO-SINK-AFTER-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
; NO-SINK-AFTER-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]]
; NO-SINK-AFTER: pred.store.if14:
; NO-SINK-AFTER-NEXT: [[TMP35:%.*]] = add i32 [[INDEX]], 3
; NO-SINK-AFTER-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP35]]
; NO-SINK-AFTER-NEXT: store i32 [[TMP5]], i32* [[TMP36]], align 4
; NO-SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE15]]
; NO-SINK-AFTER: pred.store.continue15:
; NO-SINK-AFTER-NEXT: [[TMP37:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; NO-SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 -4, i32 -4, i32 -4, i32 -4>
; NO-SINK-AFTER-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4>
; NO-SINK-AFTER-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; NO-SINK-AFTER-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52]], !llvm.loop [[LOOP56:![0-9]+]]
; NO-SINK-AFTER: middle.block:
; NO-SINK-AFTER-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP37]])
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP22]], i32 2
; NO-SINK-AFTER-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
; NO-SINK-AFTER: scalar.ph:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
; NO-SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: br label [[BB2:%.*]]
; NO-SINK-AFTER: bb1:
; NO-SINK-AFTER-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: ret i32 [[VAR]]
; NO-SINK-AFTER: bb2:
; NO-SINK-AFTER-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; NO-SINK-AFTER-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; NO-SINK-AFTER-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; NO-SINK-AFTER-NEXT: [[G:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[IV]]
; NO-SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
; NO-SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
; NO-SINK-AFTER-NEXT: store i32 [[VAR3]], i32* [[G]], align 4
; NO-SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1
; NO-SINK-AFTER-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
; NO-SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
; NO-SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF54]], !llvm.loop [[LOOP57:![0-9]+]]
;
bb:
br label %bb2
bb1: ; preds = %bb2
%var = phi i32 [ %var6, %bb2 ]
ret i32 %var
bb2: ; preds = %bb2, %bb
%var3 = phi i32 [ %var8, %bb2 ], [ %y, %bb ]
%iv = phi i32 [ %iv.next, %bb2 ], [ 0, %bb ]
%var4 = phi i32 [ %var7, %bb2 ], [ 0, %bb ]
%var5 = phi i32 [ %var6, %bb2 ], [ 0, %bb ]
%g = getelementptr inbounds i32, i32* %x, i32 %iv
%var6 = add i32 %var5, %var4
%var7 = udiv i32 219220132, %var3
store i32 %var3, i32* %g, align 4
%var8 = add nsw i32 %var3, -1
%iv.next = add nsw i32 %iv, 1
%var9 = icmp slt i32 %var3, 2
br i1 %var9, label %bb1, label %bb2, !prof !2
}
; %vec.dead will be marked as dead instruction in the vector loop and no recipe
; will be created for it. Make sure a valid sink target is used.
define void @sink_after_dead_inst(i32* %A.ptr) {
; CHECK-LABEL: @sink_after_dead_inst(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = zext i32 [[INDEX]] to i64
; CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[OFFSET_IDX]], 48
; CHECK-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 48
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP58:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP59:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; UNROLL-LABEL: @sink_after_dead_inst(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = zext i32 [[INDEX]] to i64
; UNROLL-NEXT: [[SEXT:%.*]] = shl i64 [[OFFSET_IDX]], 48
; UNROLL-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 48
; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i64 [[TMP0]]
; UNROLL-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
; UNROLL-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[TMP2]], align 4
; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP1]], i64 4
; UNROLL-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
; UNROLL-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[TMP4]], align 4
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP58:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: br label [[LOOP:%.*]]
; UNROLL: loop:
; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP59:![0-9]+]]
; UNROLL: for.end:
; UNROLL-NEXT: ret void
;
; UNROLL-NO-IC-LABEL: @sink_after_dead_inst(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 2
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 3
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 4
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 5
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i16 [[OFFSET_IDX]], 6
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 7
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add <4 x i16> [[STEP_ADD]], <i16 1, i16 1, i16 1, i16 1>
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = or <4 x i16> [[TMP8]], [[TMP8]]
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = or <4 x i16> [[TMP9]], [[TMP9]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = zext <4 x i16> [[TMP10]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP13]] = zext <4 x i16> [[TMP11]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP12]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp eq <4 x i32> [[TMP14]], <i32 15, i32 15, i32 15, i32 15>
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = icmp eq <4 x i32> [[TMP15]], <i32 15, i32 15, i32 15, i32 15>
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = icmp eq <4 x i1> [[TMP16]], <i1 true, i1 true, i1 true, i1 true>
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp eq <4 x i1> [[TMP17]], <i1 true, i1 true, i1 true, i1 true>
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = and <4 x i1> [[TMP16]], [[TMP18]]
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = and <4 x i1> [[TMP17]], [[TMP19]]
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = zext <4 x i1> [[TMP20]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = zext <4 x i1> [[TMP21]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[TMP0]]
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[TMP4]]
; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr i32, i32* [[TMP24]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[TMP27]], align 4
; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = getelementptr i32, i32* [[TMP24]], i32 4
; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[TMP29]], align 4
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
; UNROLL-NO-IC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP58:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 16, 16
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP13]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP13]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
; UNROLL-NO-IC: loop:
; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR]], 15
; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true
; UNROLL-NO-IC-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true
; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; UNROLL-NO-IC-NEXT: [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]]
; UNROLL-NO-IC-NEXT: [[B3:%.*]] = and i1 [[CMP]], [[C]]
; UNROLL-NO-IC-NEXT: [[FOR_PREV]] = zext i16 [[B1]] to i32
; UNROLL-NO-IC-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32
; UNROLL-NO-IC-NEXT: [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]]
; UNROLL-NO-IC-NEXT: store i32 0, i32* [[A_GEP]], align 4
; UNROLL-NO-IC-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP59:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @sink_after_dead_inst(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i16 [[OFFSET_IDX]], 0
; UNROLL-NO-VF-NEXT: [[INDUCTION1:%.*]] = add i16 [[OFFSET_IDX]], 1
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i16 [[INDUCTION]], 1
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i16 [[INDUCTION1]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = or i16 [[TMP0]], [[TMP0]]
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = or i16 [[TMP1]], [[TMP1]]
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32
; UNROLL-NO-VF-NEXT: [[TMP5]] = zext i16 [[TMP3]] to i32
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = icmp eq i32 [[VECTOR_RECUR]], 15
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP4]], 15
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = icmp eq i1 [[TMP6]], true
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = icmp eq i1 [[TMP7]], true
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = and i1 [[TMP6]], [[TMP8]]
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = and i1 [[TMP7]], [[TMP9]]
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = zext i1 [[TMP10]] to i32
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = zext i1 [[TMP11]] to i32
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[INDUCTION]]
; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[INDUCTION1]]
; UNROLL-NO-VF-NEXT: store i32 0, i32* [[TMP14]], align 4
; UNROLL-NO-VF-NEXT: store i32 0, i32* [[TMP15]], align 4
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP57:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 16, 16
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[LOOP:%.*]]
; UNROLL-NO-VF: loop:
; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ]
; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR]], 15
; UNROLL-NO-VF-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true
; UNROLL-NO-VF-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true
; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; UNROLL-NO-VF-NEXT: [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]]
; UNROLL-NO-VF-NEXT: [[B3:%.*]] = and i1 [[CMP]], [[C]]
; UNROLL-NO-VF-NEXT: [[FOR_PREV]] = zext i16 [[B1]] to i32
; UNROLL-NO-VF-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32
; UNROLL-NO-VF-NEXT: [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]]
; UNROLL-NO-VF-NEXT: store i32 0, i32* [[A_GEP]], align 4
; UNROLL-NO-VF-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP58:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @sink_after_dead_inst(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 2
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 3
; SINK-AFTER-NEXT: [[TMP4:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
; SINK-AFTER-NEXT: [[TMP5:%.*]] = or <4 x i16> [[TMP4]], [[TMP4]]
; SINK-AFTER-NEXT: [[TMP6]] = zext <4 x i16> [[TMP5]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP6]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[TMP7]], <i32 15, i32 15, i32 15, i32 15>
; SINK-AFTER-NEXT: [[TMP9:%.*]] = icmp eq <4 x i1> [[TMP8]], <i1 true, i1 true, i1 true, i1 true>
; SINK-AFTER-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP8]], [[TMP9]]
; SINK-AFTER-NEXT: [[TMP11:%.*]] = zext <4 x i1> [[TMP10]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[TMP0]]
; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 0
; SINK-AFTER-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
; SINK-AFTER-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[TMP14]], align 4
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
; SINK-AFTER-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
; SINK-AFTER-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP58:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 16, 16
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[LOOP:%.*]]
; SINK-AFTER: loop:
; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ]
; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR]], 15
; SINK-AFTER-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true
; SINK-AFTER-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true
; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; SINK-AFTER-NEXT: [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]]
; SINK-AFTER-NEXT: [[B3:%.*]] = and i1 [[CMP]], [[C]]
; SINK-AFTER-NEXT: [[FOR_PREV]] = zext i16 [[B1]] to i32
; SINK-AFTER-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32
; SINK-AFTER-NEXT: [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]]
; SINK-AFTER-NEXT: store i32 0, i32* [[A_GEP]], align 4
; SINK-AFTER-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP59:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
; NO-SINK-AFTER-LABEL: @sink_after_dead_inst(
; NO-SINK-AFTER-NEXT: entry:
; NO-SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-SINK-AFTER: vector.ph:
; NO-SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-SINK-AFTER: vector.body:
; NO-SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
; NO-SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
; NO-SINK-AFTER-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
; NO-SINK-AFTER-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1
; NO-SINK-AFTER-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 2
; NO-SINK-AFTER-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 3
; NO-SINK-AFTER-NEXT: [[TMP4:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
; NO-SINK-AFTER-NEXT: [[TMP5:%.*]] = or <4 x i16> [[TMP4]], [[TMP4]]
; NO-SINK-AFTER-NEXT: [[TMP6]] = zext <4 x i16> [[TMP5]] to <4 x i32>
; NO-SINK-AFTER-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP6]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; NO-SINK-AFTER-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[TMP7]], <i32 15, i32 15, i32 15, i32 15>
; NO-SINK-AFTER-NEXT: [[TMP9:%.*]] = icmp eq <4 x i1> [[TMP8]], <i1 true, i1 true, i1 true, i1 true>
; NO-SINK-AFTER-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP8]], [[TMP9]]
; NO-SINK-AFTER-NEXT: [[TMP11:%.*]] = zext <4 x i1> [[TMP10]] to <4 x i32>
; NO-SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[TMP0]]
; NO-SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 0
; NO-SINK-AFTER-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
; NO-SINK-AFTER-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[TMP14]], align 4
; NO-SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; NO-SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
; NO-SINK-AFTER-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
; NO-SINK-AFTER-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP58:![0-9]+]]
; NO-SINK-AFTER: middle.block:
; NO-SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 16, 16
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3
; NO-SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2
; NO-SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; NO-SINK-AFTER: scalar.ph:
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; NO-SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; NO-SINK-AFTER-NEXT: br label [[LOOP:%.*]]
; NO-SINK-AFTER: loop:
; NO-SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; NO-SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ]
; NO-SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR]], 15
; NO-SINK-AFTER-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true
; NO-SINK-AFTER-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true
; NO-SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; NO-SINK-AFTER-NEXT: [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]]
; NO-SINK-AFTER-NEXT: [[B3:%.*]] = and i1 [[CMP]], [[C]]
; NO-SINK-AFTER-NEXT: [[FOR_PREV]] = zext i16 [[B1]] to i32
; NO-SINK-AFTER-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32
; NO-SINK-AFTER-NEXT: [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]]
; NO-SINK-AFTER-NEXT: store i32 0, i32* [[A_GEP]], align 4
; NO-SINK-AFTER-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP59:![0-9]+]]
; NO-SINK-AFTER: for.end:
; NO-SINK-AFTER-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ]
%for = phi i32 [ 0, %entry ], [ %for.prev, %loop ]
%cmp = icmp eq i32 %for, 15
%C = icmp eq i1 %cmp, true
%vec.dead = and i1 %C, 1
%iv.next = add i16 %iv, 1
%B1 = or i16 %iv.next, %iv.next
%B3 = and i1 %cmp, %C
%for.prev = zext i16 %B1 to i32
%ext = zext i1 %B3 to i32
%A.gep = getelementptr i32, i32* %A.ptr, i16 %iv
store i32 0, i32* %A.gep
br i1 %vec.dead, label %for.end, label %loop
for.end:
ret void
}
!2 = !{!"branch_weights", i32 1, i32 1}