llvm-project/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
Florian Hahn 3026ecaff5
[LV] Also verify loops in vector loop removal tests.
Also verify loop info in tests added in 7d6ec3b9680.
2024-12-31 20:11:23 +00:00

808 lines
48 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=8 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF8UF1 %s
; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=8 -force-vector-interleave=2 -S %s | FileCheck --check-prefixes=VF8UF2 %s
; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=16 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF16UF1 %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
; Check if the vector loop condition can be simplified to true for a given
; VF/IC combination.
define void @test_tc_less_than_16(ptr %A, i64 %N) {
; VF8UF1-LABEL: define void @test_tc_less_than_16(
; VF8UF1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; VF8UF1-NEXT: [[ENTRY:.*]]:
; VF8UF1-NEXT: [[AND:%.*]] = and i64 [[N]], 15
; VF8UF1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[AND]], 8
; VF8UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF8UF1: [[VECTOR_PH]]:
; VF8UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[AND]], 8
; VF8UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[AND]], [[N_MOD_VF]]
; VF8UF1-NEXT: [[TMP0:%.*]] = sub i64 [[AND]], [[N_VEC]]
; VF8UF1-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]]
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF1: [[VECTOR_BODY]]:
; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF8UF1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; VF8UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
; VF8UF1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1
; VF8UF1-NEXT: [[TMP4:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
; VF8UF1-NEXT: store <8 x i8> [[TMP4]], ptr [[TMP3]], align 1
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; VF8UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VF8UF1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; VF8UF1: [[MIDDLE_BLOCK]]:
; VF8UF1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[AND]], [[N_VEC]]
; VF8UF1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; VF8UF1: [[SCALAR_PH]]:
; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[AND]], %[[ENTRY]] ]
; VF8UF1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
; VF8UF1-NEXT: br label %[[LOOP:.*]]
; VF8UF1: [[LOOP]]:
; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; VF8UF1-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
; VF8UF1-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
; VF8UF1-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
; VF8UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 0
; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; VF8UF1: [[EXIT]]:
; VF8UF1-NEXT: ret void
;
; VF8UF2-LABEL: define void @test_tc_less_than_16(
; VF8UF2-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; VF8UF2-NEXT: [[ENTRY:.*]]:
; VF8UF2-NEXT: [[AND:%.*]] = and i64 [[N]], 15
; VF8UF2-NEXT: br i1 true, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF8UF2: [[VECTOR_PH]]:
; VF8UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[AND]], 16
; VF8UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[AND]], [[N_MOD_VF]]
; VF8UF2-NEXT: [[TMP0:%.*]] = sub i64 [[AND]], [[N_VEC]]
; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]]
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF2: [[VECTOR_BODY]]:
; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF8UF2-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
; VF8UF2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]]
; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
; VF8UF2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1
; VF8UF2-NEXT: [[TMP4:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
; VF8UF2-NEXT: [[TMP5:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10)
; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[TMP2]], align 1
; VF8UF2-NEXT: store <8 x i8> [[TMP5]], ptr [[TMP3]], align 1
; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; VF8UF2-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[AND]], [[N_VEC]]
; VF8UF2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; VF8UF2: [[SCALAR_PH]]:
; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[AND]], %[[ENTRY]] ]
; VF8UF2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
; VF8UF2-NEXT: br label %[[LOOP:.*]]
; VF8UF2: [[LOOP]]:
; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; VF8UF2-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
; VF8UF2-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
; VF8UF2-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
; VF8UF2-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 0
; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; VF8UF2: [[EXIT]]:
; VF8UF2-NEXT: ret void
;
; VF16UF1-LABEL: define void @test_tc_less_than_16(
; VF16UF1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; VF16UF1-NEXT: [[ENTRY:.*]]:
; VF16UF1-NEXT: [[AND:%.*]] = and i64 [[N]], 15
; VF16UF1-NEXT: br i1 true, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF16UF1: [[VECTOR_PH]]:
; VF16UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[AND]], 16
; VF16UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[AND]], [[N_MOD_VF]]
; VF16UF1-NEXT: [[TMP0:%.*]] = sub i64 [[AND]], [[N_VEC]]
; VF16UF1-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]]
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF16UF1: [[VECTOR_BODY]]:
; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF16UF1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
; VF16UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
; VF16UF1-NEXT: [[TMP3:%.*]] = add nsw <16 x i8> [[WIDE_LOAD]], splat (i8 10)
; VF16UF1-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP2]], align 1
; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; VF16UF1-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; VF16UF1: [[MIDDLE_BLOCK]]:
; VF16UF1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[AND]], [[N_VEC]]
; VF16UF1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; VF16UF1: [[SCALAR_PH]]:
; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[AND]], %[[ENTRY]] ]
; VF16UF1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
; VF16UF1-NEXT: br label %[[LOOP:.*]]
; VF16UF1: [[LOOP]]:
; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; VF16UF1-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
; VF16UF1-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
; VF16UF1-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
; VF16UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 0
; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; VF16UF1: [[EXIT]]:
; VF16UF1-NEXT: ret void
;
entry:
%and = and i64 %N, 15
br label %loop
loop:
%iv = phi i64 [ %and, %entry ], [ %iv.next, %loop ]
%p.src = phi ptr [ %A, %entry ], [ %p.src.next, %loop ]
%p.src.next = getelementptr inbounds i8, ptr %p.src, i64 1
%l = load i8, ptr %p.src, align 1
%add = add nsw i8 %l, 10
store i8 %add, ptr %p.src
%iv.next = add nsw i64 %iv, -1
%cmp = icmp eq i64 %iv.next, 0
br i1 %cmp, label %exit, label %loop
exit:
ret void
}
define void @remove_loop_region_with_replicate_recipe(ptr %dst, i64 range(i64 5, 10) %N) {
; VF8UF1-LABEL: define void @remove_loop_region_with_replicate_recipe(
; VF8UF1-SAME: ptr [[DST:%.*]], i64 range(i64 5, 10) [[N:%.*]]) {
; VF8UF1-NEXT: [[ENTRY:.*]]:
; VF8UF1-NEXT: [[TMP0:%.*]] = add nsw i64 [[N]], -2
; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF8UF1: [[VECTOR_PH]]:
; VF8UF1-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 7
; VF8UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
; VF8UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; VF8UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
; VF8UF1-NEXT: [[TMP1:%.*]] = add i64 2, [[N_VEC]]
; VF8UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; VF8UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF1: [[VECTOR_BODY]]:
; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE16:.*]] ]
; VF8UF1-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]]
; VF8UF1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
; VF8UF1-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
; VF8UF1-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT1]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
; VF8UF1-NEXT: [[TMP2:%.*]] = icmp ule <8 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
; VF8UF1-NEXT: [[TMP3:%.*]] = extractelement <8 x i1> [[TMP2]], i32 0
; VF8UF1-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF1: [[PRED_STORE_IF]]:
; VF8UF1-NEXT: [[TMP20:%.*]] = add i64 [[OFFSET_IDX]], 0
; VF8UF1-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP20]]
; VF8UF1-NEXT: store i16 0, ptr [[TMP4]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF8UF1: [[PRED_STORE_CONTINUE]]:
; VF8UF1-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP2]], i32 1
; VF8UF1-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF8UF1: [[PRED_STORE_IF3]]:
; VF8UF1-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 1
; VF8UF1-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP21]]
; VF8UF1-NEXT: store i16 0, ptr [[TMP6]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; VF8UF1: [[PRED_STORE_CONTINUE4]]:
; VF8UF1-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP2]], i32 2
; VF8UF1-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF8UF1: [[PRED_STORE_IF5]]:
; VF8UF1-NEXT: [[TMP23:%.*]] = add i64 [[OFFSET_IDX]], 2
; VF8UF1-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP23]]
; VF8UF1-NEXT: store i16 0, ptr [[TMP8]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; VF8UF1: [[PRED_STORE_CONTINUE6]]:
; VF8UF1-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP2]], i32 3
; VF8UF1-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF8UF1: [[PRED_STORE_IF7]]:
; VF8UF1-NEXT: [[TMP24:%.*]] = add i64 [[OFFSET_IDX]], 3
; VF8UF1-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP24]]
; VF8UF1-NEXT: store i16 0, ptr [[TMP10]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]]
; VF8UF1: [[PRED_STORE_CONTINUE8]]:
; VF8UF1-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP2]], i32 4
; VF8UF1-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF8UF1: [[PRED_STORE_IF9]]:
; VF8UF1-NEXT: [[TMP26:%.*]] = add i64 [[OFFSET_IDX]], 4
; VF8UF1-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP26]]
; VF8UF1-NEXT: store i16 0, ptr [[TMP12]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]]
; VF8UF1: [[PRED_STORE_CONTINUE10]]:
; VF8UF1-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP2]], i32 5
; VF8UF1-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF8UF1: [[PRED_STORE_IF11]]:
; VF8UF1-NEXT: [[TMP19:%.*]] = add i64 [[OFFSET_IDX]], 5
; VF8UF1-NEXT: [[TMP14:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP19]]
; VF8UF1-NEXT: store i16 0, ptr [[TMP14]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]]
; VF8UF1: [[PRED_STORE_CONTINUE12]]:
; VF8UF1-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP2]], i32 6
; VF8UF1-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF8UF1: [[PRED_STORE_IF13]]:
; VF8UF1-NEXT: [[TMP22:%.*]] = add i64 [[OFFSET_IDX]], 6
; VF8UF1-NEXT: [[TMP16:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP22]]
; VF8UF1-NEXT: store i16 0, ptr [[TMP16]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]]
; VF8UF1: [[PRED_STORE_CONTINUE14]]:
; VF8UF1-NEXT: [[TMP17:%.*]] = extractelement <8 x i1> [[TMP2]], i32 7
; VF8UF1-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16]]
; VF8UF1: [[PRED_STORE_IF15]]:
; VF8UF1-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], 7
; VF8UF1-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP25]]
; VF8UF1-NEXT: store i16 0, ptr [[TMP18]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE16]]
; VF8UF1: [[PRED_STORE_CONTINUE16]]:
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; VF8UF1-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; VF8UF1: [[MIDDLE_BLOCK]]:
; VF8UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; VF8UF1: [[SCALAR_PH]]:
; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
; VF8UF1-NEXT: br label %[[LOOP:.*]]
; VF8UF1: [[LOOP]]:
; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; VF8UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i16, ptr [[DST]], i64 [[IV]]
; VF8UF1-NEXT: store i16 0, ptr [[GEP_DST]], align 2
; VF8UF1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; VF8UF1-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; VF8UF1-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; VF8UF1: [[EXIT]]:
; VF8UF1-NEXT: ret void
;
; VF8UF2-LABEL: define void @remove_loop_region_with_replicate_recipe(
; VF8UF2-SAME: ptr [[DST:%.*]], i64 range(i64 5, 10) [[N:%.*]]) {
; VF8UF2-NEXT: [[ENTRY:.*]]:
; VF8UF2-NEXT: [[TMP0:%.*]] = add nsw i64 [[N]], -2
; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF8UF2: [[VECTOR_PH]]:
; VF8UF2-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 15
; VF8UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16
; VF8UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; VF8UF2-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
; VF8UF2-NEXT: [[TMP1:%.*]] = add i64 2, [[N_VEC]]
; VF8UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; VF8UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF2: [[VECTOR_BODY]]:
; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE35:.*]] ]
; VF8UF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]]
; VF8UF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
; VF8UF2-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
; VF8UF2-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT1]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
; VF8UF2-NEXT: [[VEC_IV3:%.*]] = add <8 x i64> [[BROADCAST_SPLAT1]], <i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
; VF8UF2-NEXT: [[TMP2:%.*]] = icmp ule <8 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
; VF8UF2-NEXT: [[TMP3:%.*]] = icmp ule <8 x i64> [[VEC_IV3]], [[BROADCAST_SPLAT]]
; VF8UF2-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP2]], i32 0
; VF8UF2-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF2: [[PRED_STORE_IF]]:
; VF8UF2-NEXT: [[TMP36:%.*]] = add i64 [[OFFSET_IDX]], 0
; VF8UF2-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP36]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP5]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF8UF2: [[PRED_STORE_CONTINUE]]:
; VF8UF2-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP2]], i32 1
; VF8UF2-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
; VF8UF2: [[PRED_STORE_IF6]]:
; VF8UF2-NEXT: [[TMP37:%.*]] = add i64 [[OFFSET_IDX]], 1
; VF8UF2-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP37]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP7]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE7]]
; VF8UF2: [[PRED_STORE_CONTINUE7]]:
; VF8UF2-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP2]], i32 2
; VF8UF2-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
; VF8UF2: [[PRED_STORE_IF8]]:
; VF8UF2-NEXT: [[TMP39:%.*]] = add i64 [[OFFSET_IDX]], 2
; VF8UF2-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP39]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP9]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE9]]
; VF8UF2: [[PRED_STORE_CONTINUE9]]:
; VF8UF2-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP2]], i32 3
; VF8UF2-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11:.*]]
; VF8UF2: [[PRED_STORE_IF10]]:
; VF8UF2-NEXT: [[TMP40:%.*]] = add i64 [[OFFSET_IDX]], 3
; VF8UF2-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP40]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP11]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE11]]
; VF8UF2: [[PRED_STORE_CONTINUE11]]:
; VF8UF2-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP2]], i32 4
; VF8UF2-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]]
; VF8UF2: [[PRED_STORE_IF12]]:
; VF8UF2-NEXT: [[TMP42:%.*]] = add i64 [[OFFSET_IDX]], 4
; VF8UF2-NEXT: [[TMP13:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP42]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP13]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE13]]
; VF8UF2: [[PRED_STORE_CONTINUE13]]:
; VF8UF2-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP2]], i32 5
; VF8UF2-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
; VF8UF2: [[PRED_STORE_IF14]]:
; VF8UF2-NEXT: [[TMP43:%.*]] = add i64 [[OFFSET_IDX]], 5
; VF8UF2-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP43]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP15]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE15]]
; VF8UF2: [[PRED_STORE_CONTINUE15]]:
; VF8UF2-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP2]], i32 6
; VF8UF2-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]]
; VF8UF2: [[PRED_STORE_IF16]]:
; VF8UF2-NEXT: [[TMP45:%.*]] = add i64 [[OFFSET_IDX]], 6
; VF8UF2-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP45]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP17]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE17]]
; VF8UF2: [[PRED_STORE_CONTINUE17]]:
; VF8UF2-NEXT: [[TMP18:%.*]] = extractelement <8 x i1> [[TMP2]], i32 7
; VF8UF2-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
; VF8UF2: [[PRED_STORE_IF18]]:
; VF8UF2-NEXT: [[TMP46:%.*]] = add i64 [[OFFSET_IDX]], 7
; VF8UF2-NEXT: [[TMP19:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP46]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP19]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE19]]
; VF8UF2: [[PRED_STORE_CONTINUE19]]:
; VF8UF2-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
; VF8UF2-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]]
; VF8UF2: [[PRED_STORE_IF20]]:
; VF8UF2-NEXT: [[TMP48:%.*]] = add i64 [[OFFSET_IDX]], 8
; VF8UF2-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP48]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP21]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE21]]
; VF8UF2: [[PRED_STORE_CONTINUE21]]:
; VF8UF2-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
; VF8UF2-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]]
; VF8UF2: [[PRED_STORE_IF22]]:
; VF8UF2-NEXT: [[TMP49:%.*]] = add i64 [[OFFSET_IDX]], 9
; VF8UF2-NEXT: [[TMP23:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP49]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP23]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE23]]
; VF8UF2: [[PRED_STORE_CONTINUE23]]:
; VF8UF2-NEXT: [[TMP24:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
; VF8UF2-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]]
; VF8UF2: [[PRED_STORE_IF24]]:
; VF8UF2-NEXT: [[TMP51:%.*]] = add i64 [[OFFSET_IDX]], 10
; VF8UF2-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP51]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP25]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE25]]
; VF8UF2: [[PRED_STORE_CONTINUE25]]:
; VF8UF2-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
; VF8UF2-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27:.*]]
; VF8UF2: [[PRED_STORE_IF26]]:
; VF8UF2-NEXT: [[TMP38:%.*]] = add i64 [[OFFSET_IDX]], 11
; VF8UF2-NEXT: [[TMP27:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP38]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP27]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE27]]
; VF8UF2: [[PRED_STORE_CONTINUE27]]:
; VF8UF2-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
; VF8UF2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29:.*]]
; VF8UF2: [[PRED_STORE_IF28]]:
; VF8UF2-NEXT: [[TMP41:%.*]] = add i64 [[OFFSET_IDX]], 12
; VF8UF2-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP41]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP29]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE29]]
; VF8UF2: [[PRED_STORE_CONTINUE29]]:
; VF8UF2-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
; VF8UF2-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]]
; VF8UF2: [[PRED_STORE_IF30]]:
; VF8UF2-NEXT: [[TMP44:%.*]] = add i64 [[OFFSET_IDX]], 13
; VF8UF2-NEXT: [[TMP31:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP44]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP31]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE31]]
; VF8UF2: [[PRED_STORE_CONTINUE31]]:
; VF8UF2-NEXT: [[TMP32:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
; VF8UF2-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]]
; VF8UF2: [[PRED_STORE_IF32]]:
; VF8UF2-NEXT: [[TMP47:%.*]] = add i64 [[OFFSET_IDX]], 14
; VF8UF2-NEXT: [[TMP33:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP47]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP33]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE33]]
; VF8UF2: [[PRED_STORE_CONTINUE33]]:
; VF8UF2-NEXT: [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
; VF8UF2-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35]]
; VF8UF2: [[PRED_STORE_IF34]]:
; VF8UF2-NEXT: [[TMP50:%.*]] = add i64 [[OFFSET_IDX]], 15
; VF8UF2-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP50]]
; VF8UF2-NEXT: store i16 0, ptr [[TMP35]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE35]]
; VF8UF2: [[PRED_STORE_CONTINUE35]]:
; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; VF8UF2-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; VF8UF2: [[SCALAR_PH]]:
; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
; VF8UF2-NEXT: br label %[[LOOP:.*]]
; VF8UF2: [[LOOP]]:
; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; VF8UF2-NEXT: [[GEP_DST:%.*]] = getelementptr i16, ptr [[DST]], i64 [[IV]]
; VF8UF2-NEXT: store i16 0, ptr [[GEP_DST]], align 2
; VF8UF2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; VF8UF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; VF8UF2-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; VF8UF2: [[EXIT]]:
; VF8UF2-NEXT: ret void
;
; VF16UF1-LABEL: define void @remove_loop_region_with_replicate_recipe(
; VF16UF1-SAME: ptr [[DST:%.*]], i64 range(i64 5, 10) [[N:%.*]]) {
; VF16UF1-NEXT: [[ENTRY:.*]]:
; VF16UF1-NEXT: [[TMP0:%.*]] = add nsw i64 [[N]], -2
; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF16UF1: [[VECTOR_PH]]:
; VF16UF1-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 15
; VF16UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16
; VF16UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; VF16UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
; VF16UF1-NEXT: [[TMP1:%.*]] = add i64 2, [[N_VEC]]
; VF16UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; VF16UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF16UF1: [[VECTOR_BODY]]:
; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE32:.*]] ]
; VF16UF1-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]]
; VF16UF1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[INDEX]], i64 0
; VF16UF1-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer
; VF16UF1-NEXT: [[VEC_IV:%.*]] = add <16 x i64> [[BROADCAST_SPLAT1]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
; VF16UF1-NEXT: [[TMP2:%.*]] = icmp ule <16 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
; VF16UF1-NEXT: [[TMP3:%.*]] = extractelement <16 x i1> [[TMP2]], i32 0
; VF16UF1-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF16UF1: [[PRED_STORE_IF]]:
; VF16UF1-NEXT: [[TMP35:%.*]] = add i64 [[OFFSET_IDX]], 0
; VF16UF1-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP35]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP4]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF16UF1: [[PRED_STORE_CONTINUE]]:
; VF16UF1-NEXT: [[TMP5:%.*]] = extractelement <16 x i1> [[TMP2]], i32 1
; VF16UF1-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF16UF1: [[PRED_STORE_IF3]]:
; VF16UF1-NEXT: [[TMP36:%.*]] = add i64 [[OFFSET_IDX]], 1
; VF16UF1-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP36]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP6]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; VF16UF1: [[PRED_STORE_CONTINUE4]]:
; VF16UF1-NEXT: [[TMP7:%.*]] = extractelement <16 x i1> [[TMP2]], i32 2
; VF16UF1-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF16UF1: [[PRED_STORE_IF5]]:
; VF16UF1-NEXT: [[TMP38:%.*]] = add i64 [[OFFSET_IDX]], 2
; VF16UF1-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP38]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP8]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; VF16UF1: [[PRED_STORE_CONTINUE6]]:
; VF16UF1-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP2]], i32 3
; VF16UF1-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF16UF1: [[PRED_STORE_IF7]]:
; VF16UF1-NEXT: [[TMP39:%.*]] = add i64 [[OFFSET_IDX]], 3
; VF16UF1-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP39]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP10]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]]
; VF16UF1: [[PRED_STORE_CONTINUE8]]:
; VF16UF1-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP2]], i32 4
; VF16UF1-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF16UF1: [[PRED_STORE_IF9]]:
; VF16UF1-NEXT: [[TMP41:%.*]] = add i64 [[OFFSET_IDX]], 4
; VF16UF1-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP41]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP12]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]]
; VF16UF1: [[PRED_STORE_CONTINUE10]]:
; VF16UF1-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[TMP2]], i32 5
; VF16UF1-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF16UF1: [[PRED_STORE_IF11]]:
; VF16UF1-NEXT: [[TMP42:%.*]] = add i64 [[OFFSET_IDX]], 5
; VF16UF1-NEXT: [[TMP14:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP42]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP14]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]]
; VF16UF1: [[PRED_STORE_CONTINUE12]]:
; VF16UF1-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP2]], i32 6
; VF16UF1-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF16UF1: [[PRED_STORE_IF13]]:
; VF16UF1-NEXT: [[TMP44:%.*]] = add i64 [[OFFSET_IDX]], 6
; VF16UF1-NEXT: [[TMP16:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP44]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP16]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]]
; VF16UF1: [[PRED_STORE_CONTINUE14]]:
; VF16UF1-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP2]], i32 7
; VF16UF1-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
; VF16UF1: [[PRED_STORE_IF15]]:
; VF16UF1-NEXT: [[TMP45:%.*]] = add i64 [[OFFSET_IDX]], 7
; VF16UF1-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP45]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP18]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE16]]
; VF16UF1: [[PRED_STORE_CONTINUE16]]:
; VF16UF1-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP2]], i32 8
; VF16UF1-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
; VF16UF1: [[PRED_STORE_IF17]]:
; VF16UF1-NEXT: [[TMP47:%.*]] = add i64 [[OFFSET_IDX]], 8
; VF16UF1-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP47]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP20]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE18]]
; VF16UF1: [[PRED_STORE_CONTINUE18]]:
; VF16UF1-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[TMP2]], i32 9
; VF16UF1-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
; VF16UF1: [[PRED_STORE_IF19]]:
; VF16UF1-NEXT: [[TMP48:%.*]] = add i64 [[OFFSET_IDX]], 9
; VF16UF1-NEXT: [[TMP22:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP48]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP22]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE20]]
; VF16UF1: [[PRED_STORE_CONTINUE20]]:
; VF16UF1-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP2]], i32 10
; VF16UF1-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
; VF16UF1: [[PRED_STORE_IF21]]:
; VF16UF1-NEXT: [[TMP50:%.*]] = add i64 [[OFFSET_IDX]], 10
; VF16UF1-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP50]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP24]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE22]]
; VF16UF1: [[PRED_STORE_CONTINUE22]]:
; VF16UF1-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[TMP2]], i32 11
; VF16UF1-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
; VF16UF1: [[PRED_STORE_IF23]]:
; VF16UF1-NEXT: [[TMP37:%.*]] = add i64 [[OFFSET_IDX]], 11
; VF16UF1-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP37]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP26]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE24]]
; VF16UF1: [[PRED_STORE_CONTINUE24]]:
; VF16UF1-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP2]], i32 12
; VF16UF1-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
; VF16UF1: [[PRED_STORE_IF25]]:
; VF16UF1-NEXT: [[TMP40:%.*]] = add i64 [[OFFSET_IDX]], 12
; VF16UF1-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP40]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP28]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE26]]
; VF16UF1: [[PRED_STORE_CONTINUE26]]:
; VF16UF1-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP2]], i32 13
; VF16UF1-NEXT: br i1 [[TMP29]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
; VF16UF1: [[PRED_STORE_IF27]]:
; VF16UF1-NEXT: [[TMP43:%.*]] = add i64 [[OFFSET_IDX]], 13
; VF16UF1-NEXT: [[TMP30:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP43]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP30]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE28]]
; VF16UF1: [[PRED_STORE_CONTINUE28]]:
; VF16UF1-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP2]], i32 14
; VF16UF1-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
; VF16UF1: [[PRED_STORE_IF29]]:
; VF16UF1-NEXT: [[TMP46:%.*]] = add i64 [[OFFSET_IDX]], 14
; VF16UF1-NEXT: [[TMP32:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP46]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP32]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE30]]
; VF16UF1: [[PRED_STORE_CONTINUE30]]:
; VF16UF1-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[TMP2]], i32 15
; VF16UF1-NEXT: br i1 [[TMP33]], label %[[PRED_STORE_IF31:.*]], label %[[PRED_STORE_CONTINUE32]]
; VF16UF1: [[PRED_STORE_IF31]]:
; VF16UF1-NEXT: [[TMP49:%.*]] = add i64 [[OFFSET_IDX]], 15
; VF16UF1-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP49]]
; VF16UF1-NEXT: store i16 0, ptr [[TMP34]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE32]]
; VF16UF1: [[PRED_STORE_CONTINUE32]]:
; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; VF16UF1-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; VF16UF1: [[MIDDLE_BLOCK]]:
; VF16UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; VF16UF1: [[SCALAR_PH]]:
; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
; VF16UF1-NEXT: br label %[[LOOP:.*]]
; VF16UF1: [[LOOP]]:
; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; VF16UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i16, ptr [[DST]], i64 [[IV]]
; VF16UF1-NEXT: store i16 0, ptr [[GEP_DST]], align 2
; VF16UF1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; VF16UF1-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; VF16UF1-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; VF16UF1: [[EXIT]]:
; VF16UF1-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 2, %entry ], [ %iv.next, %loop ]
%gep.dst = getelementptr i16, ptr %dst, i64 %iv
store i16 0, ptr %gep.dst, align 2
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv.next, %N
br i1 %ec, label %exit, label %loop
exit:
ret void
}
declare i1 @cond()
define void @remove_loop_region_outer_loop(i64 range(i64 8, 17) %N, ptr noalias %src, ptr %dst) {
; VF8UF1-LABEL: define void @remove_loop_region_outer_loop(
; VF8UF1-SAME: i64 range(i64 8, 17) [[N:%.*]], ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) {
; VF8UF1-NEXT: [[ENTRY:.*]]:
; VF8UF1-NEXT: br label %[[OUTER_HEADER:.*]]
; VF8UF1: [[OUTER_HEADER]]:
; VF8UF1-NEXT: [[OUTER_IV:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF8UF1: [[VECTOR_PH]]:
; VF8UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
; VF8UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF1: [[VECTOR_BODY]]:
; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF8UF1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; VF8UF1-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 [[TMP0]]
; VF8UF1-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
; VF8UF1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]]
; VF8UF1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP3]], i32 0
; VF8UF1-NEXT: store <8 x i8> [[WIDE_LOAD]], ptr [[TMP4]], align 1
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; VF8UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VF8UF1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; VF8UF1: [[MIDDLE_BLOCK]]:
; VF8UF1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; VF8UF1-NEXT: br i1 [[CMP_N]], label %[[OUTER_LATCH]], label %[[SCALAR_PH]]
; VF8UF1: [[SCALAR_PH]]:
; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_HEADER]] ]
; VF8UF1-NEXT: br label %[[INNER:.*]]
; VF8UF1: [[INNER]]:
; VF8UF1-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[INNER]] ]
; VF8UF1-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 [[INNER_IV]]
; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1
; VF8UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INNER_IV]]
; VF8UF1-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1
; VF8UF1-NEXT: [[IV_NEXT]] = add i64 [[INNER_IV]], 1
; VF8UF1-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; VF8UF1-NEXT: br i1 [[C_1]], label %[[OUTER_LATCH]], label %[[INNER]], !llvm.loop [[LOOP7:![0-9]+]]
; VF8UF1: [[OUTER_LATCH]]:
; VF8UF1-NEXT: [[OUTER_IV_NEXT]] = getelementptr i8, ptr [[OUTER_IV]], i64 1
; VF8UF1-NEXT: [[C_2:%.*]] = call i1 @cond()
; VF8UF1-NEXT: br i1 [[C_2]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
; VF8UF1: [[EXIT]]:
; VF8UF1-NEXT: ret void
;
; VF8UF2-LABEL: define void @remove_loop_region_outer_loop(
; VF8UF2-SAME: i64 range(i64 8, 17) [[N:%.*]], ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) {
; VF8UF2-NEXT: [[ENTRY:.*]]:
; VF8UF2-NEXT: br label %[[OUTER_HEADER:.*]]
; VF8UF2: [[OUTER_HEADER]]:
; VF8UF2-NEXT: [[OUTER_IV:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
; VF8UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; VF8UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF8UF2: [[VECTOR_PH]]:
; VF8UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; VF8UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF2: [[VECTOR_BODY]]:
; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF8UF2-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 [[TMP6]]
; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
; VF8UF2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
; VF8UF2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP3]], i32 0
; VF8UF2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP3]], i32 8
; VF8UF2-NEXT: store <8 x i8> [[WIDE_LOAD]], ptr [[TMP4]], align 1
; VF8UF2-NEXT: store <8 x i8> [[WIDE_LOAD1]], ptr [[TMP5]], align 1
; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; VF8UF2-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; VF8UF2-NEXT: br i1 [[CMP_N]], label %[[OUTER_LATCH]], label %[[SCALAR_PH]]
; VF8UF2: [[SCALAR_PH]]:
; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_HEADER]] ]
; VF8UF2-NEXT: br label %[[INNER:.*]]
; VF8UF2: [[INNER]]:
; VF8UF2-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[INNER]] ]
; VF8UF2-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 [[INNER_IV]]
; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1
; VF8UF2-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INNER_IV]]
; VF8UF2-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1
; VF8UF2-NEXT: [[IV_NEXT]] = add i64 [[INNER_IV]], 1
; VF8UF2-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; VF8UF2-NEXT: br i1 [[C_1]], label %[[OUTER_LATCH]], label %[[INNER]], !llvm.loop [[LOOP7:![0-9]+]]
; VF8UF2: [[OUTER_LATCH]]:
; VF8UF2-NEXT: [[OUTER_IV_NEXT]] = getelementptr i8, ptr [[OUTER_IV]], i64 1
; VF8UF2-NEXT: [[C_2:%.*]] = call i1 @cond()
; VF8UF2-NEXT: br i1 [[C_2]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
; VF8UF2: [[EXIT]]:
; VF8UF2-NEXT: ret void
;
; VF16UF1-LABEL: define void @remove_loop_region_outer_loop(
; VF16UF1-SAME: i64 range(i64 8, 17) [[N:%.*]], ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) {
; VF16UF1-NEXT: [[ENTRY:.*]]:
; VF16UF1-NEXT: br label %[[OUTER_HEADER:.*]]
; VF16UF1: [[OUTER_HEADER]]:
; VF16UF1-NEXT: [[OUTER_IV:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
; VF16UF1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; VF16UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF16UF1: [[VECTOR_PH]]:
; VF16UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; VF16UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF16UF1: [[VECTOR_BODY]]:
; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF16UF1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 [[TMP4]]
; VF16UF1-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1
; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]]
; VF16UF1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i32 0
; VF16UF1-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 1
; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; VF16UF1-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; VF16UF1: [[MIDDLE_BLOCK]]:
; VF16UF1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; VF16UF1-NEXT: br i1 [[CMP_N]], label %[[OUTER_LATCH]], label %[[SCALAR_PH]]
; VF16UF1: [[SCALAR_PH]]:
; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_HEADER]] ]
; VF16UF1-NEXT: br label %[[INNER:.*]]
; VF16UF1: [[INNER]]:
; VF16UF1-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[INNER]] ]
; VF16UF1-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 [[INNER_IV]]
; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1
; VF16UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INNER_IV]]
; VF16UF1-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1
; VF16UF1-NEXT: [[IV_NEXT]] = add i64 [[INNER_IV]], 1
; VF16UF1-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; VF16UF1-NEXT: br i1 [[C_1]], label %[[OUTER_LATCH]], label %[[INNER]], !llvm.loop [[LOOP7:![0-9]+]]
; VF16UF1: [[OUTER_LATCH]]:
; VF16UF1-NEXT: [[OUTER_IV_NEXT]] = getelementptr i8, ptr [[OUTER_IV]], i64 1
; VF16UF1-NEXT: [[C_2:%.*]] = call i1 @cond()
; VF16UF1-NEXT: br i1 [[C_2]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
; VF16UF1: [[EXIT]]:
; VF16UF1-NEXT: ret void
;
entry:
br label %outer.header
outer.header:
%outer.iv = phi ptr [ %src, %entry ], [ %outer.iv.next, %outer.latch ]
br label %inner
inner:
%inner.iv = phi i64 [ 0, %outer.header ], [ %iv.next, %inner ]
%gep.src = getelementptr i8, ptr %outer.iv, i64 %inner.iv
%l = load i8, ptr %gep.src, align 1
%gep.dst = getelementptr i8, ptr %dst, i64 %inner.iv
store i8 %l, ptr %gep.dst, align 1
%iv.next = add i64 %inner.iv, 1
%c.1 = icmp eq i64 %iv.next, %N
br i1 %c.1, label %outer.latch, label %inner
outer.latch:
%outer.iv.next = getelementptr i8, ptr %outer.iv, i64 1
%c.2 = call i1 @cond()
br i1 %c.2, label %outer.header, label %exit
exit:
ret void
}
;.
; VF8UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; VF8UF1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; VF8UF1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; VF8UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; VF8UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; VF8UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; VF8UF1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; VF8UF1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
;.
; VF8UF2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; VF8UF2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; VF8UF2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; VF8UF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; VF8UF2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; VF8UF2: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; VF8UF2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; VF8UF2: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
;.
; VF16UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; VF16UF1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; VF16UF1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; VF16UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; VF16UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; VF16UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; VF16UF1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; VF16UF1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}