llvm-project/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
Ramkumar Ramachandra 2d4a8dadba
[VPlan] Use DL index type consistently for GEPs (#169396)
In preparation to strip VPUnrollPartAccessor and unroll recipes
directly, strip unnecessary complication in getGEPIndexTy, as the unroll
part will no longer be available in follow-ups (see #168886 for
instance). The patch also helps by doing a mass test update up-front.
Narrowing the GEP index type conditionally does not yield any benefit,
and the change is non-functional in terms of emitted assembly. While at
it, avoid hard-coding address-space 0, and use the pointer operand's
address space to get the GEP index type.
2025-11-26 12:25:55 +00:00

1234 lines
69 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=8 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF8UF1 %s
; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=8 -force-vector-interleave=2 -S %s | FileCheck --check-prefixes=VF8UF2 %s
; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=16 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF16UF1 %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
; Check if the vector loop condition can be simplified to true for a given
; VF/IC combination.
define void @test_tc_less_than_16(ptr %A, i64 %N) {
; VF8UF1-LABEL: define void @test_tc_less_than_16(
; VF8UF1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; VF8UF1-NEXT: [[ENTRY:.*]]:
; VF8UF1-NEXT: [[AND:%.*]] = and i64 [[N]], 15
; VF8UF1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[AND]], 8
; VF8UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF8UF1: [[VECTOR_PH]]:
; VF8UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[AND]], 8
; VF8UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[AND]], [[N_MOD_VF]]
; VF8UF1-NEXT: [[TMP0:%.*]] = sub i64 [[AND]], [[N_VEC]]
; VF8UF1-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]]
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF1: [[VECTOR_BODY]]:
; VF8UF1-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF8UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[NEXT_GEP]], align 1
; VF8UF1-NEXT: [[TMP4:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
; VF8UF1-NEXT: store <8 x i8> [[TMP4]], ptr [[NEXT_GEP]], align 1
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 8
; VF8UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VF8UF1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; VF8UF1: [[MIDDLE_BLOCK]]:
; VF8UF1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[AND]], [[N_VEC]]
; VF8UF1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; VF8UF1: [[SCALAR_PH]]:
; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[AND]], %[[ENTRY]] ]
; VF8UF1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
; VF8UF1-NEXT: br label %[[LOOP:.*]]
; VF8UF1: [[LOOP]]:
; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; VF8UF1-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
; VF8UF1-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
; VF8UF1-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
; VF8UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 0
; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; VF8UF1: [[EXIT]]:
; VF8UF1-NEXT: ret void
;
; VF8UF2-LABEL: define void @test_tc_less_than_16(
; VF8UF2-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; VF8UF2-NEXT: [[ENTRY:.*]]:
; VF8UF2-NEXT: [[AND:%.*]] = and i64 [[N]], 15
; VF8UF2-NEXT: br label %[[LOOP:.*]]
; VF8UF2: [[LOOP]]:
; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[AND]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; VF8UF2-NEXT: [[P_SRC:%.*]] = phi ptr [ [[A]], %[[ENTRY]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
; VF8UF2-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
; VF8UF2-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
; VF8UF2-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 0
; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]]
; VF8UF2: [[EXIT]]:
; VF8UF2-NEXT: ret void
;
; VF16UF1-LABEL: define void @test_tc_less_than_16(
; VF16UF1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; VF16UF1-NEXT: [[ENTRY:.*]]:
; VF16UF1-NEXT: [[AND:%.*]] = and i64 [[N]], 15
; VF16UF1-NEXT: br label %[[LOOP:.*]]
; VF16UF1: [[LOOP]]:
; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ [[AND]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; VF16UF1-NEXT: [[P_SRC:%.*]] = phi ptr [ [[A]], %[[ENTRY]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
; VF16UF1-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
; VF16UF1-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
; VF16UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 0
; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]]
; VF16UF1: [[EXIT]]:
; VF16UF1-NEXT: ret void
;
entry:
%and = and i64 %N, 15
br label %loop
loop:
%iv = phi i64 [ %and, %entry ], [ %iv.next, %loop ]
%p.src = phi ptr [ %A, %entry ], [ %p.src.next, %loop ]
%p.src.next = getelementptr inbounds i8, ptr %p.src, i64 1
%l = load i8, ptr %p.src, align 1
%add = add nsw i8 %l, 10
store i8 %add, ptr %p.src
%iv.next = add nsw i64 %iv, -1
%cmp = icmp eq i64 %iv.next, 0
br i1 %cmp, label %exit, label %loop
exit:
ret void
}
define void @remove_loop_region_with_replicate_recipe(ptr %dst, i64 range(i64 5, 10) %N) {
; VF8UF1-LABEL: define void @remove_loop_region_with_replicate_recipe(
; VF8UF1-SAME: ptr [[DST:%.*]], i64 range(i64 5, 10) [[N:%.*]]) {
; VF8UF1-NEXT: [[ENTRY:.*:]]
; VF8UF1-NEXT: [[TMP0:%.*]] = add nsw i64 [[N]], -2
; VF8UF1-NEXT: br label %[[VECTOR_PH:.*]]
; VF8UF1: [[VECTOR_PH]]:
; VF8UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
; VF8UF1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; VF8UF1-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF1: [[VECTOR_BODY]]:
; VF8UF1-NEXT: [[TMP2:%.*]] = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[BROADCAST_SPLAT1]]
; VF8UF1-NEXT: [[TMP3:%.*]] = extractelement <8 x i1> [[TMP2]], i32 0
; VF8UF1-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF1: [[PRED_STORE_IF]]:
; VF8UF1-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i64 2
; VF8UF1-NEXT: store i16 0, ptr [[TMP4]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF8UF1: [[PRED_STORE_CONTINUE]]:
; VF8UF1-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP2]], i32 1
; VF8UF1-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; VF8UF1: [[PRED_STORE_IF1]]:
; VF8UF1-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[DST]], i64 3
; VF8UF1-NEXT: store i16 0, ptr [[TMP6]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; VF8UF1: [[PRED_STORE_CONTINUE2]]:
; VF8UF1-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP2]], i32 2
; VF8UF1-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF8UF1: [[PRED_STORE_IF3]]:
; VF8UF1-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[DST]], i64 4
; VF8UF1-NEXT: store i16 0, ptr [[TMP8]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; VF8UF1: [[PRED_STORE_CONTINUE4]]:
; VF8UF1-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP2]], i32 3
; VF8UF1-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF8UF1: [[PRED_STORE_IF5]]:
; VF8UF1-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 5
; VF8UF1-NEXT: store i16 0, ptr [[TMP10]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; VF8UF1: [[PRED_STORE_CONTINUE6]]:
; VF8UF1-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP2]], i32 4
; VF8UF1-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF8UF1: [[PRED_STORE_IF7]]:
; VF8UF1-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[DST]], i64 6
; VF8UF1-NEXT: store i16 0, ptr [[TMP12]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]]
; VF8UF1: [[PRED_STORE_CONTINUE8]]:
; VF8UF1-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP2]], i32 5
; VF8UF1-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF8UF1: [[PRED_STORE_IF9]]:
; VF8UF1-NEXT: [[TMP14:%.*]] = getelementptr i16, ptr [[DST]], i64 7
; VF8UF1-NEXT: store i16 0, ptr [[TMP14]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]]
; VF8UF1: [[PRED_STORE_CONTINUE10]]:
; VF8UF1-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP2]], i32 6
; VF8UF1-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF8UF1: [[PRED_STORE_IF11]]:
; VF8UF1-NEXT: [[TMP16:%.*]] = getelementptr i16, ptr [[DST]], i64 8
; VF8UF1-NEXT: store i16 0, ptr [[TMP16]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]]
; VF8UF1: [[PRED_STORE_CONTINUE12]]:
; VF8UF1-NEXT: [[TMP17:%.*]] = extractelement <8 x i1> [[TMP2]], i32 7
; VF8UF1-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF8UF1: [[PRED_STORE_IF13]]:
; VF8UF1-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[DST]], i64 9
; VF8UF1-NEXT: store i16 0, ptr [[TMP18]], align 2
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]]
; VF8UF1: [[PRED_STORE_CONTINUE14]]:
; VF8UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF8UF1: [[MIDDLE_BLOCK]]:
; VF8UF1-NEXT: br label %[[EXIT:.*]]
; VF8UF1: [[EXIT]]:
; VF8UF1-NEXT: ret void
;
; VF8UF2-LABEL: define void @remove_loop_region_with_replicate_recipe(
; VF8UF2-SAME: ptr [[DST:%.*]], i64 range(i64 5, 10) [[N:%.*]]) {
; VF8UF2-NEXT: [[ENTRY:.*:]]
; VF8UF2-NEXT: [[TMP0:%.*]] = add nsw i64 [[N]], -2
; VF8UF2-NEXT: br label %[[VECTOR_PH:.*]]
; VF8UF2: [[VECTOR_PH]]:
; VF8UF2-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
; VF8UF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; VF8UF2-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF2: [[VECTOR_BODY]]:
; VF8UF2-NEXT: [[TMP2:%.*]] = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[BROADCAST_SPLAT1]]
; VF8UF2-NEXT: [[TMP3:%.*]] = icmp ule <8 x i64> <i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[BROADCAST_SPLAT1]]
; VF8UF2-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP2]], i32 0
; VF8UF2-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF2: [[PRED_STORE_IF]]:
; VF8UF2-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[DST]], i64 2
; VF8UF2-NEXT: store i16 0, ptr [[TMP5]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF8UF2: [[PRED_STORE_CONTINUE]]:
; VF8UF2-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP2]], i32 1
; VF8UF2-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; VF8UF2: [[PRED_STORE_IF1]]:
; VF8UF2-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[DST]], i64 3
; VF8UF2-NEXT: store i16 0, ptr [[TMP7]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; VF8UF2: [[PRED_STORE_CONTINUE2]]:
; VF8UF2-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP2]], i32 2
; VF8UF2-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF8UF2: [[PRED_STORE_IF3]]:
; VF8UF2-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[DST]], i64 4
; VF8UF2-NEXT: store i16 0, ptr [[TMP9]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; VF8UF2: [[PRED_STORE_CONTINUE4]]:
; VF8UF2-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP2]], i32 3
; VF8UF2-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF8UF2: [[PRED_STORE_IF5]]:
; VF8UF2-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 5
; VF8UF2-NEXT: store i16 0, ptr [[TMP11]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; VF8UF2: [[PRED_STORE_CONTINUE6]]:
; VF8UF2-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP2]], i32 4
; VF8UF2-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF8UF2: [[PRED_STORE_IF7]]:
; VF8UF2-NEXT: [[TMP13:%.*]] = getelementptr i16, ptr [[DST]], i64 6
; VF8UF2-NEXT: store i16 0, ptr [[TMP13]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE8]]
; VF8UF2: [[PRED_STORE_CONTINUE8]]:
; VF8UF2-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP2]], i32 5
; VF8UF2-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF8UF2: [[PRED_STORE_IF9]]:
; VF8UF2-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[DST]], i64 7
; VF8UF2-NEXT: store i16 0, ptr [[TMP15]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE10]]
; VF8UF2: [[PRED_STORE_CONTINUE10]]:
; VF8UF2-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP2]], i32 6
; VF8UF2-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF8UF2: [[PRED_STORE_IF11]]:
; VF8UF2-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[DST]], i64 8
; VF8UF2-NEXT: store i16 0, ptr [[TMP17]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE12]]
; VF8UF2: [[PRED_STORE_CONTINUE12]]:
; VF8UF2-NEXT: [[TMP18:%.*]] = extractelement <8 x i1> [[TMP2]], i32 7
; VF8UF2-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF8UF2: [[PRED_STORE_IF13]]:
; VF8UF2-NEXT: [[TMP19:%.*]] = getelementptr i16, ptr [[DST]], i64 9
; VF8UF2-NEXT: store i16 0, ptr [[TMP19]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE14]]
; VF8UF2: [[PRED_STORE_CONTINUE14]]:
; VF8UF2-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
; VF8UF2-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
; VF8UF2: [[PRED_STORE_IF15]]:
; VF8UF2-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[DST]], i64 10
; VF8UF2-NEXT: store i16 0, ptr [[TMP21]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE16]]
; VF8UF2: [[PRED_STORE_CONTINUE16]]:
; VF8UF2-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
; VF8UF2-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
; VF8UF2: [[PRED_STORE_IF17]]:
; VF8UF2-NEXT: [[TMP23:%.*]] = getelementptr i16, ptr [[DST]], i64 11
; VF8UF2-NEXT: store i16 0, ptr [[TMP23]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE18]]
; VF8UF2: [[PRED_STORE_CONTINUE18]]:
; VF8UF2-NEXT: [[TMP24:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
; VF8UF2-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
; VF8UF2: [[PRED_STORE_IF19]]:
; VF8UF2-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[DST]], i64 12
; VF8UF2-NEXT: store i16 0, ptr [[TMP25]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE20]]
; VF8UF2: [[PRED_STORE_CONTINUE20]]:
; VF8UF2-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
; VF8UF2-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
; VF8UF2: [[PRED_STORE_IF21]]:
; VF8UF2-NEXT: [[TMP27:%.*]] = getelementptr i16, ptr [[DST]], i64 13
; VF8UF2-NEXT: store i16 0, ptr [[TMP27]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE22]]
; VF8UF2: [[PRED_STORE_CONTINUE22]]:
; VF8UF2-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
; VF8UF2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
; VF8UF2: [[PRED_STORE_IF23]]:
; VF8UF2-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DST]], i64 14
; VF8UF2-NEXT: store i16 0, ptr [[TMP29]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE24]]
; VF8UF2: [[PRED_STORE_CONTINUE24]]:
; VF8UF2-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
; VF8UF2-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
; VF8UF2: [[PRED_STORE_IF25]]:
; VF8UF2-NEXT: [[TMP31:%.*]] = getelementptr i16, ptr [[DST]], i64 15
; VF8UF2-NEXT: store i16 0, ptr [[TMP31]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE26]]
; VF8UF2: [[PRED_STORE_CONTINUE26]]:
; VF8UF2-NEXT: [[TMP32:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
; VF8UF2-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
; VF8UF2: [[PRED_STORE_IF27]]:
; VF8UF2-NEXT: [[TMP33:%.*]] = getelementptr i16, ptr [[DST]], i64 16
; VF8UF2-NEXT: store i16 0, ptr [[TMP33]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE28]]
; VF8UF2: [[PRED_STORE_CONTINUE28]]:
; VF8UF2-NEXT: [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
; VF8UF2-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
; VF8UF2: [[PRED_STORE_IF29]]:
; VF8UF2-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DST]], i64 17
; VF8UF2-NEXT: store i16 0, ptr [[TMP35]], align 2
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE30]]
; VF8UF2: [[PRED_STORE_CONTINUE30]]:
; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: br label %[[EXIT:.*]]
; VF8UF2: [[EXIT]]:
; VF8UF2-NEXT: ret void
;
; VF16UF1-LABEL: define void @remove_loop_region_with_replicate_recipe(
; VF16UF1-SAME: ptr [[DST:%.*]], i64 range(i64 5, 10) [[N:%.*]]) {
; VF16UF1-NEXT: [[ENTRY:.*:]]
; VF16UF1-NEXT: [[TMP0:%.*]] = add nsw i64 [[N]], -2
; VF16UF1-NEXT: br label %[[VECTOR_PH:.*]]
; VF16UF1: [[VECTOR_PH]]:
; VF16UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
; VF16UF1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; VF16UF1-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF16UF1: [[VECTOR_BODY]]:
; VF16UF1-NEXT: [[TMP2:%.*]] = icmp ule <16 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[BROADCAST_SPLAT1]]
; VF16UF1-NEXT: [[TMP3:%.*]] = extractelement <16 x i1> [[TMP2]], i32 0
; VF16UF1-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF16UF1: [[PRED_STORE_IF]]:
; VF16UF1-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i64 2
; VF16UF1-NEXT: store i16 0, ptr [[TMP4]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF16UF1: [[PRED_STORE_CONTINUE]]:
; VF16UF1-NEXT: [[TMP5:%.*]] = extractelement <16 x i1> [[TMP2]], i32 1
; VF16UF1-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; VF16UF1: [[PRED_STORE_IF1]]:
; VF16UF1-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[DST]], i64 3
; VF16UF1-NEXT: store i16 0, ptr [[TMP6]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; VF16UF1: [[PRED_STORE_CONTINUE2]]:
; VF16UF1-NEXT: [[TMP7:%.*]] = extractelement <16 x i1> [[TMP2]], i32 2
; VF16UF1-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF16UF1: [[PRED_STORE_IF3]]:
; VF16UF1-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[DST]], i64 4
; VF16UF1-NEXT: store i16 0, ptr [[TMP8]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; VF16UF1: [[PRED_STORE_CONTINUE4]]:
; VF16UF1-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP2]], i32 3
; VF16UF1-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF16UF1: [[PRED_STORE_IF5]]:
; VF16UF1-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 5
; VF16UF1-NEXT: store i16 0, ptr [[TMP10]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; VF16UF1: [[PRED_STORE_CONTINUE6]]:
; VF16UF1-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP2]], i32 4
; VF16UF1-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF16UF1: [[PRED_STORE_IF7]]:
; VF16UF1-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[DST]], i64 6
; VF16UF1-NEXT: store i16 0, ptr [[TMP12]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]]
; VF16UF1: [[PRED_STORE_CONTINUE8]]:
; VF16UF1-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[TMP2]], i32 5
; VF16UF1-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF16UF1: [[PRED_STORE_IF9]]:
; VF16UF1-NEXT: [[TMP14:%.*]] = getelementptr i16, ptr [[DST]], i64 7
; VF16UF1-NEXT: store i16 0, ptr [[TMP14]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]]
; VF16UF1: [[PRED_STORE_CONTINUE10]]:
; VF16UF1-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP2]], i32 6
; VF16UF1-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF16UF1: [[PRED_STORE_IF11]]:
; VF16UF1-NEXT: [[TMP16:%.*]] = getelementptr i16, ptr [[DST]], i64 8
; VF16UF1-NEXT: store i16 0, ptr [[TMP16]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]]
; VF16UF1: [[PRED_STORE_CONTINUE12]]:
; VF16UF1-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP2]], i32 7
; VF16UF1-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF16UF1: [[PRED_STORE_IF13]]:
; VF16UF1-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[DST]], i64 9
; VF16UF1-NEXT: store i16 0, ptr [[TMP18]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]]
; VF16UF1: [[PRED_STORE_CONTINUE14]]:
; VF16UF1-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP2]], i32 8
; VF16UF1-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
; VF16UF1: [[PRED_STORE_IF15]]:
; VF16UF1-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[DST]], i64 10
; VF16UF1-NEXT: store i16 0, ptr [[TMP20]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE16]]
; VF16UF1: [[PRED_STORE_CONTINUE16]]:
; VF16UF1-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[TMP2]], i32 9
; VF16UF1-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
; VF16UF1: [[PRED_STORE_IF17]]:
; VF16UF1-NEXT: [[TMP22:%.*]] = getelementptr i16, ptr [[DST]], i64 11
; VF16UF1-NEXT: store i16 0, ptr [[TMP22]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE18]]
; VF16UF1: [[PRED_STORE_CONTINUE18]]:
; VF16UF1-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP2]], i32 10
; VF16UF1-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
; VF16UF1: [[PRED_STORE_IF19]]:
; VF16UF1-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[DST]], i64 12
; VF16UF1-NEXT: store i16 0, ptr [[TMP24]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE20]]
; VF16UF1: [[PRED_STORE_CONTINUE20]]:
; VF16UF1-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[TMP2]], i32 11
; VF16UF1-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
; VF16UF1: [[PRED_STORE_IF21]]:
; VF16UF1-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[DST]], i64 13
; VF16UF1-NEXT: store i16 0, ptr [[TMP26]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE22]]
; VF16UF1: [[PRED_STORE_CONTINUE22]]:
; VF16UF1-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP2]], i32 12
; VF16UF1-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
; VF16UF1: [[PRED_STORE_IF23]]:
; VF16UF1-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[DST]], i64 14
; VF16UF1-NEXT: store i16 0, ptr [[TMP28]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE24]]
; VF16UF1: [[PRED_STORE_CONTINUE24]]:
; VF16UF1-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP2]], i32 13
; VF16UF1-NEXT: br i1 [[TMP29]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
; VF16UF1: [[PRED_STORE_IF25]]:
; VF16UF1-NEXT: [[TMP30:%.*]] = getelementptr i16, ptr [[DST]], i64 15
; VF16UF1-NEXT: store i16 0, ptr [[TMP30]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE26]]
; VF16UF1: [[PRED_STORE_CONTINUE26]]:
; VF16UF1-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP2]], i32 14
; VF16UF1-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
; VF16UF1: [[PRED_STORE_IF27]]:
; VF16UF1-NEXT: [[TMP32:%.*]] = getelementptr i16, ptr [[DST]], i64 16
; VF16UF1-NEXT: store i16 0, ptr [[TMP32]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE28]]
; VF16UF1: [[PRED_STORE_CONTINUE28]]:
; VF16UF1-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[TMP2]], i32 15
; VF16UF1-NEXT: br i1 [[TMP33]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
; VF16UF1: [[PRED_STORE_IF29]]:
; VF16UF1-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[DST]], i64 17
; VF16UF1-NEXT: store i16 0, ptr [[TMP34]], align 2
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE30]]
; VF16UF1: [[PRED_STORE_CONTINUE30]]:
; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF16UF1: [[MIDDLE_BLOCK]]:
; VF16UF1-NEXT: br label %[[EXIT:.*]]
; VF16UF1: [[EXIT]]:
; VF16UF1-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 2, %entry ], [ %iv.next, %loop ]
%gep.dst = getelementptr i16, ptr %dst, i64 %iv
store i16 0, ptr %gep.dst, align 2
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv.next, %N
br i1 %ec, label %exit, label %loop
exit:
ret void
}
declare i1 @cond()
define void @remove_loop_region_outer_loop(i64 range(i64 8, 17) %N, ptr noalias %src, ptr %dst) {
; VF8UF1-LABEL: define void @remove_loop_region_outer_loop(
; VF8UF1-SAME: i64 range(i64 8, 17) [[N:%.*]], ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) {
; VF8UF1-NEXT: [[ENTRY:.*]]:
; VF8UF1-NEXT: br label %[[OUTER_HEADER:.*]]
; VF8UF1: [[OUTER_HEADER]]:
; VF8UF1-NEXT: [[OUTER_IV:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
; VF8UF1-NEXT: br label %[[VECTOR_PH:.*]]
; VF8UF1: [[VECTOR_PH]]:
; VF8UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
; VF8UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF1: [[VECTOR_BODY]]:
; VF8UF1-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF8UF1-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 [[TMP0]]
; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
; VF8UF1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]]
; VF8UF1-NEXT: store <8 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 1
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 8
; VF8UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VF8UF1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; VF8UF1: [[MIDDLE_BLOCK]]:
; VF8UF1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; VF8UF1-NEXT: br i1 [[CMP_N]], label %[[OUTER_LATCH]], label %[[SCALAR_PH:.*]]
; VF8UF1: [[SCALAR_PH]]:
; VF8UF1-NEXT: br label %[[INNER:.*]]
; VF8UF1: [[INNER]]:
; VF8UF1-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[N_VEC]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[INNER]] ]
; VF8UF1-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 [[INNER_IV]]
; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1
; VF8UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INNER_IV]]
; VF8UF1-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1
; VF8UF1-NEXT: [[IV_NEXT]] = add i64 [[INNER_IV]], 1
; VF8UF1-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; VF8UF1-NEXT: br i1 [[C_1]], label %[[OUTER_LATCH]], label %[[INNER]], !llvm.loop [[LOOP5:![0-9]+]]
; VF8UF1: [[OUTER_LATCH]]:
; VF8UF1-NEXT: [[OUTER_IV_NEXT]] = getelementptr i8, ptr [[OUTER_IV]], i64 1
; VF8UF1-NEXT: [[C_2:%.*]] = call i1 @cond()
; VF8UF1-NEXT: br i1 [[C_2]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
; VF8UF1: [[EXIT]]:
; VF8UF1-NEXT: ret void
;
; VF8UF2-LABEL: define void @remove_loop_region_outer_loop(
; VF8UF2-SAME: i64 range(i64 8, 17) [[N:%.*]], ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) {
; VF8UF2-NEXT: [[ENTRY:.*]]:
; VF8UF2-NEXT: br label %[[OUTER_HEADER:.*]]
; VF8UF2: [[OUTER_HEADER]]:
; VF8UF2-NEXT: [[TMP0:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
; VF8UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; VF8UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF8UF2: [[VECTOR_PH]]:
; VF8UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; VF8UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF2: [[VECTOR_BODY]]:
; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
; VF8UF2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 8
; VF8UF2-NEXT: store <8 x i8> [[WIDE_LOAD]], ptr [[DST]], align 1
; VF8UF2-NEXT: store <8 x i8> [[WIDE_LOAD1]], ptr [[TMP5]], align 1
; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; VF8UF2-NEXT: br i1 [[CMP_N]], label %[[OUTER_LATCH]], label %[[SCALAR_PH]]
; VF8UF2: [[SCALAR_PH]]:
; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_HEADER]] ]
; VF8UF2-NEXT: br label %[[INNER:.*]]
; VF8UF2: [[INNER]]:
; VF8UF2-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[INNER]] ]
; VF8UF2-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[INNER_IV]]
; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1
; VF8UF2-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INNER_IV]]
; VF8UF2-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1
; VF8UF2-NEXT: [[IV_NEXT]] = add i64 [[INNER_IV]], 1
; VF8UF2-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; VF8UF2-NEXT: br i1 [[C_1]], label %[[OUTER_LATCH]], label %[[INNER]], !llvm.loop [[LOOP0:![0-9]+]]
; VF8UF2: [[OUTER_LATCH]]:
; VF8UF2-NEXT: [[OUTER_IV_NEXT]] = getelementptr i8, ptr [[TMP0]], i64 1
; VF8UF2-NEXT: [[C_2:%.*]] = call i1 @cond()
; VF8UF2-NEXT: br i1 [[C_2]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
; VF8UF2: [[EXIT]]:
; VF8UF2-NEXT: ret void
;
; VF16UF1-LABEL: define void @remove_loop_region_outer_loop(
; VF16UF1-SAME: i64 range(i64 8, 17) [[N:%.*]], ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) {
; VF16UF1-NEXT: [[ENTRY:.*]]:
; VF16UF1-NEXT: br label %[[OUTER_HEADER:.*]]
; VF16UF1: [[OUTER_HEADER]]:
; VF16UF1-NEXT: [[TMP1:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
; VF16UF1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; VF16UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF16UF1: [[VECTOR_PH]]:
; VF16UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; VF16UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF16UF1: [[VECTOR_BODY]]:
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1
; VF16UF1-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[DST]], align 1
; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF16UF1: [[MIDDLE_BLOCK]]:
; VF16UF1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; VF16UF1-NEXT: br i1 [[CMP_N]], label %[[OUTER_LATCH]], label %[[SCALAR_PH]]
; VF16UF1: [[SCALAR_PH]]:
; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_HEADER]] ]
; VF16UF1-NEXT: br label %[[INNER:.*]]
; VF16UF1: [[INNER]]:
; VF16UF1-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[INNER]] ]
; VF16UF1-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[INNER_IV]]
; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1
; VF16UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INNER_IV]]
; VF16UF1-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1
; VF16UF1-NEXT: [[IV_NEXT]] = add i64 [[INNER_IV]], 1
; VF16UF1-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; VF16UF1-NEXT: br i1 [[C_1]], label %[[OUTER_LATCH]], label %[[INNER]], !llvm.loop [[LOOP0:![0-9]+]]
; VF16UF1: [[OUTER_LATCH]]:
; VF16UF1-NEXT: [[OUTER_IV_NEXT]] = getelementptr i8, ptr [[TMP1]], i64 1
; VF16UF1-NEXT: [[C_2:%.*]] = call i1 @cond()
; VF16UF1-NEXT: br i1 [[C_2]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
; VF16UF1: [[EXIT]]:
; VF16UF1-NEXT: ret void
;
entry:
br label %outer.header
outer.header:
%outer.iv = phi ptr [ %src, %entry ], [ %outer.iv.next, %outer.latch ]
br label %inner
inner:
%inner.iv = phi i64 [ 0, %outer.header ], [ %iv.next, %inner ]
%gep.src = getelementptr i8, ptr %outer.iv, i64 %inner.iv
%l = load i8, ptr %gep.src, align 1
%gep.dst = getelementptr i8, ptr %dst, i64 %inner.iv
store i8 %l, ptr %gep.dst, align 1
%iv.next = add i64 %inner.iv, 1
%c.1 = icmp eq i64 %iv.next, %N
br i1 %c.1, label %outer.latch, label %inner
outer.latch:
%outer.iv.next = getelementptr i8, ptr %outer.iv, i64 1
%c.2 = call i1 @cond()
br i1 %c.2, label %outer.header, label %exit
exit:
ret void
}
declare void @llvm.assume(i1)
; Test case for https://github.com/llvm/llvm-project/issues/121897.
define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-LABEL: define void @scev_expand_step(
; VF8UF1-SAME: i64 [[X:%.*]], ptr [[DST:%.*]]) {
; VF8UF1-NEXT: [[ENTRY:.*:]]
; VF8UF1-NEXT: [[C:%.*]] = icmp eq i64 [[X]], 65536
; VF8UF1-NEXT: call void @llvm.assume(i1 [[C]])
; VF8UF1-NEXT: [[FR:%.*]] = freeze i64 [[X]]
; VF8UF1-NEXT: [[STEP:%.*]] = add i64 [[FR]], -65534
; VF8UF1-NEXT: [[TMP0:%.*]] = udiv i64 15, [[STEP]]
; VF8UF1-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
; VF8UF1-NEXT: br label %[[VECTOR_PH:.*]]
; VF8UF1: [[VECTOR_PH]]:
; VF8UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1
; VF8UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; VF8UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF1: [[VECTOR_BODY]]:
; VF8UF1-NEXT: [[TMP3:%.*]] = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[BROADCAST_SPLAT]]
; VF8UF1-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
; VF8UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF1: [[PRED_STORE_IF]]:
; VF8UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]]
; VF8UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]]
; VF8UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
; VF8UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP8]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF8UF1: [[PRED_STORE_CONTINUE]]:
; VF8UF1-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
; VF8UF1-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; VF8UF1: [[PRED_STORE_IF1]]:
; VF8UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[STEP]]
; VF8UF1-NEXT: [[TMP11:%.*]] = add i64 0, [[TMP10]]
; VF8UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[STEP]]
; VF8UF1-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP13]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; VF8UF1: [[PRED_STORE_CONTINUE2]]:
; VF8UF1-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
; VF8UF1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF8UF1: [[PRED_STORE_IF3]]:
; VF8UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[STEP]]
; VF8UF1-NEXT: [[TMP16:%.*]] = add i64 0, [[TMP15]]
; VF8UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[STEP]]
; VF8UF1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP18]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; VF8UF1: [[PRED_STORE_CONTINUE4]]:
; VF8UF1-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
; VF8UF1-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF8UF1: [[PRED_STORE_IF5]]:
; VF8UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[STEP]]
; VF8UF1-NEXT: [[TMP21:%.*]] = add i64 0, [[TMP20]]
; VF8UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[STEP]]
; VF8UF1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP22]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP23]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; VF8UF1: [[PRED_STORE_CONTINUE6]]:
; VF8UF1-NEXT: [[TMP24:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
; VF8UF1-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF8UF1: [[PRED_STORE_IF7]]:
; VF8UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[STEP]]
; VF8UF1-NEXT: [[TMP26:%.*]] = add i64 0, [[TMP25]]
; VF8UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[STEP]]
; VF8UF1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP28]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]]
; VF8UF1: [[PRED_STORE_CONTINUE8]]:
; VF8UF1-NEXT: [[TMP29:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
; VF8UF1-NEXT: br i1 [[TMP29]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF8UF1: [[PRED_STORE_IF9]]:
; VF8UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[STEP]]
; VF8UF1-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]]
; VF8UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[STEP]]
; VF8UF1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP32]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP33]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]]
; VF8UF1: [[PRED_STORE_CONTINUE10]]:
; VF8UF1-NEXT: [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
; VF8UF1-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF8UF1: [[PRED_STORE_IF11]]:
; VF8UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[STEP]]
; VF8UF1-NEXT: [[TMP36:%.*]] = add i64 0, [[TMP35]]
; VF8UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[STEP]]
; VF8UF1-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP37]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP38]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]]
; VF8UF1: [[PRED_STORE_CONTINUE12]]:
; VF8UF1-NEXT: [[TMP39:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
; VF8UF1-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF8UF1: [[PRED_STORE_IF13]]:
; VF8UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[STEP]]
; VF8UF1-NEXT: [[TMP41:%.*]] = add i64 0, [[TMP40]]
; VF8UF1-NEXT: [[IV_NEXT:%.*]] = add i64 [[TMP41]], [[STEP]]
; VF8UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV_NEXT]]
; VF8UF1-NEXT: store i8 0, ptr [[GEP_DST]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]]
; VF8UF1: [[PRED_STORE_CONTINUE14]]:
; VF8UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF8UF1: [[MIDDLE_BLOCK]]:
; VF8UF1-NEXT: br label %[[EXIT:.*]]
; VF8UF1: [[EXIT]]:
; VF8UF1-NEXT: ret void
;
; VF8UF2-LABEL: define void @scev_expand_step(
; VF8UF2-SAME: i64 [[X:%.*]], ptr [[DST:%.*]]) {
; VF8UF2-NEXT: [[ENTRY:.*:]]
; VF8UF2-NEXT: [[C:%.*]] = icmp eq i64 [[X]], 65536
; VF8UF2-NEXT: call void @llvm.assume(i1 [[C]])
; VF8UF2-NEXT: [[FR:%.*]] = freeze i64 [[X]]
; VF8UF2-NEXT: [[STEP:%.*]] = add i64 [[FR]], -65534
; VF8UF2-NEXT: [[TMP0:%.*]] = udiv i64 15, [[STEP]]
; VF8UF2-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
; VF8UF2-NEXT: br label %[[VECTOR_PH:.*]]
; VF8UF2: [[VECTOR_PH]]:
; VF8UF2-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1
; VF8UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; VF8UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF2: [[VECTOR_BODY]]:
; VF8UF2-NEXT: [[TMP3:%.*]] = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[BROADCAST_SPLAT]]
; VF8UF2-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> <i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[BROADCAST_SPLAT]]
; VF8UF2-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
; VF8UF2-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF2: [[PRED_STORE_IF]]:
; VF8UF2-NEXT: [[TMP6:%.*]] = mul i64 0, [[STEP]]
; VF8UF2-NEXT: [[TMP7:%.*]] = add i64 0, [[TMP6]]
; VF8UF2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[STEP]]
; VF8UF2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP9]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF8UF2: [[PRED_STORE_CONTINUE]]:
; VF8UF2-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
; VF8UF2-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; VF8UF2: [[PRED_STORE_IF1]]:
; VF8UF2-NEXT: [[TMP11:%.*]] = mul i64 1, [[STEP]]
; VF8UF2-NEXT: [[TMP12:%.*]] = add i64 0, [[TMP11]]
; VF8UF2-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[STEP]]
; VF8UF2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP14]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; VF8UF2: [[PRED_STORE_CONTINUE2]]:
; VF8UF2-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
; VF8UF2-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF8UF2: [[PRED_STORE_IF3]]:
; VF8UF2-NEXT: [[TMP16:%.*]] = mul i64 2, [[STEP]]
; VF8UF2-NEXT: [[TMP17:%.*]] = add i64 0, [[TMP16]]
; VF8UF2-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], [[STEP]]
; VF8UF2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP18]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP19]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; VF8UF2: [[PRED_STORE_CONTINUE4]]:
; VF8UF2-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
; VF8UF2-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF8UF2: [[PRED_STORE_IF5]]:
; VF8UF2-NEXT: [[TMP21:%.*]] = mul i64 3, [[STEP]]
; VF8UF2-NEXT: [[TMP22:%.*]] = add i64 0, [[TMP21]]
; VF8UF2-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[STEP]]
; VF8UF2-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP24]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; VF8UF2: [[PRED_STORE_CONTINUE6]]:
; VF8UF2-NEXT: [[TMP25:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
; VF8UF2-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF8UF2: [[PRED_STORE_IF7]]:
; VF8UF2-NEXT: [[TMP26:%.*]] = mul i64 4, [[STEP]]
; VF8UF2-NEXT: [[TMP27:%.*]] = add i64 0, [[TMP26]]
; VF8UF2-NEXT: [[TMP28:%.*]] = add i64 [[TMP27]], [[STEP]]
; VF8UF2-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP28]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP29]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE8]]
; VF8UF2: [[PRED_STORE_CONTINUE8]]:
; VF8UF2-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
; VF8UF2-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF8UF2: [[PRED_STORE_IF9]]:
; VF8UF2-NEXT: [[TMP31:%.*]] = mul i64 5, [[STEP]]
; VF8UF2-NEXT: [[TMP32:%.*]] = add i64 0, [[TMP31]]
; VF8UF2-NEXT: [[TMP33:%.*]] = add i64 [[TMP32]], [[STEP]]
; VF8UF2-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP33]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP34]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE10]]
; VF8UF2: [[PRED_STORE_CONTINUE10]]:
; VF8UF2-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
; VF8UF2-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF8UF2: [[PRED_STORE_IF11]]:
; VF8UF2-NEXT: [[TMP36:%.*]] = mul i64 6, [[STEP]]
; VF8UF2-NEXT: [[TMP37:%.*]] = add i64 0, [[TMP36]]
; VF8UF2-NEXT: [[TMP38:%.*]] = add i64 [[TMP37]], [[STEP]]
; VF8UF2-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP38]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP39]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE12]]
; VF8UF2: [[PRED_STORE_CONTINUE12]]:
; VF8UF2-NEXT: [[TMP40:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
; VF8UF2-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF8UF2: [[PRED_STORE_IF13]]:
; VF8UF2-NEXT: [[TMP41:%.*]] = mul i64 7, [[STEP]]
; VF8UF2-NEXT: [[TMP42:%.*]] = add i64 0, [[TMP41]]
; VF8UF2-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], [[STEP]]
; VF8UF2-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP43]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP44]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE14]]
; VF8UF2: [[PRED_STORE_CONTINUE14]]:
; VF8UF2-NEXT: [[TMP45:%.*]] = extractelement <8 x i1> [[TMP4]], i32 0
; VF8UF2-NEXT: br i1 [[TMP45]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
; VF8UF2: [[PRED_STORE_IF15]]:
; VF8UF2-NEXT: [[TMP46:%.*]] = mul i64 8, [[STEP]]
; VF8UF2-NEXT: [[TMP47:%.*]] = add i64 0, [[TMP46]]
; VF8UF2-NEXT: [[TMP48:%.*]] = add i64 [[TMP47]], [[STEP]]
; VF8UF2-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP48]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP49]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE16]]
; VF8UF2: [[PRED_STORE_CONTINUE16]]:
; VF8UF2-NEXT: [[TMP50:%.*]] = extractelement <8 x i1> [[TMP4]], i32 1
; VF8UF2-NEXT: br i1 [[TMP50]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
; VF8UF2: [[PRED_STORE_IF17]]:
; VF8UF2-NEXT: [[TMP51:%.*]] = mul i64 9, [[STEP]]
; VF8UF2-NEXT: [[TMP52:%.*]] = add i64 0, [[TMP51]]
; VF8UF2-NEXT: [[TMP53:%.*]] = add i64 [[TMP52]], [[STEP]]
; VF8UF2-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP53]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP54]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE18]]
; VF8UF2: [[PRED_STORE_CONTINUE18]]:
; VF8UF2-NEXT: [[TMP55:%.*]] = extractelement <8 x i1> [[TMP4]], i32 2
; VF8UF2-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
; VF8UF2: [[PRED_STORE_IF19]]:
; VF8UF2-NEXT: [[TMP56:%.*]] = mul i64 10, [[STEP]]
; VF8UF2-NEXT: [[TMP57:%.*]] = add i64 0, [[TMP56]]
; VF8UF2-NEXT: [[TMP58:%.*]] = add i64 [[TMP57]], [[STEP]]
; VF8UF2-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP58]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP59]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE20]]
; VF8UF2: [[PRED_STORE_CONTINUE20]]:
; VF8UF2-NEXT: [[TMP60:%.*]] = extractelement <8 x i1> [[TMP4]], i32 3
; VF8UF2-NEXT: br i1 [[TMP60]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
; VF8UF2: [[PRED_STORE_IF21]]:
; VF8UF2-NEXT: [[TMP61:%.*]] = mul i64 11, [[STEP]]
; VF8UF2-NEXT: [[TMP62:%.*]] = add i64 0, [[TMP61]]
; VF8UF2-NEXT: [[TMP63:%.*]] = add i64 [[TMP62]], [[STEP]]
; VF8UF2-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP63]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP64]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE22]]
; VF8UF2: [[PRED_STORE_CONTINUE22]]:
; VF8UF2-NEXT: [[TMP65:%.*]] = extractelement <8 x i1> [[TMP4]], i32 4
; VF8UF2-NEXT: br i1 [[TMP65]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
; VF8UF2: [[PRED_STORE_IF23]]:
; VF8UF2-NEXT: [[TMP66:%.*]] = mul i64 12, [[STEP]]
; VF8UF2-NEXT: [[TMP67:%.*]] = add i64 0, [[TMP66]]
; VF8UF2-NEXT: [[TMP68:%.*]] = add i64 [[TMP67]], [[STEP]]
; VF8UF2-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP68]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP69]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE24]]
; VF8UF2: [[PRED_STORE_CONTINUE24]]:
; VF8UF2-NEXT: [[TMP70:%.*]] = extractelement <8 x i1> [[TMP4]], i32 5
; VF8UF2-NEXT: br i1 [[TMP70]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
; VF8UF2: [[PRED_STORE_IF25]]:
; VF8UF2-NEXT: [[TMP71:%.*]] = mul i64 13, [[STEP]]
; VF8UF2-NEXT: [[TMP72:%.*]] = add i64 0, [[TMP71]]
; VF8UF2-NEXT: [[TMP73:%.*]] = add i64 [[TMP72]], [[STEP]]
; VF8UF2-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP73]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP74]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE26]]
; VF8UF2: [[PRED_STORE_CONTINUE26]]:
; VF8UF2-NEXT: [[TMP75:%.*]] = extractelement <8 x i1> [[TMP4]], i32 6
; VF8UF2-NEXT: br i1 [[TMP75]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
; VF8UF2: [[PRED_STORE_IF27]]:
; VF8UF2-NEXT: [[TMP76:%.*]] = mul i64 14, [[STEP]]
; VF8UF2-NEXT: [[TMP77:%.*]] = add i64 0, [[TMP76]]
; VF8UF2-NEXT: [[TMP78:%.*]] = add i64 [[TMP77]], [[STEP]]
; VF8UF2-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP78]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP79]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE28]]
; VF8UF2: [[PRED_STORE_CONTINUE28]]:
; VF8UF2-NEXT: [[TMP80:%.*]] = extractelement <8 x i1> [[TMP4]], i32 7
; VF8UF2-NEXT: br i1 [[TMP80]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
; VF8UF2: [[PRED_STORE_IF29]]:
; VF8UF2-NEXT: [[TMP81:%.*]] = mul i64 15, [[STEP]]
; VF8UF2-NEXT: [[TMP82:%.*]] = add i64 0, [[TMP81]]
; VF8UF2-NEXT: [[TMP83:%.*]] = add i64 [[TMP82]], [[STEP]]
; VF8UF2-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP83]]
; VF8UF2-NEXT: store i8 0, ptr [[GEP_DST]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE30]]
; VF8UF2: [[PRED_STORE_CONTINUE30]]:
; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: br label %[[EXIT:.*]]
; VF8UF2: [[EXIT]]:
; VF8UF2-NEXT: ret void
;
; VF16UF1-LABEL: define void @scev_expand_step(
; VF16UF1-SAME: i64 [[X:%.*]], ptr [[DST:%.*]]) {
; VF16UF1-NEXT: [[ENTRY:.*:]]
; VF16UF1-NEXT: [[C:%.*]] = icmp eq i64 [[X]], 65536
; VF16UF1-NEXT: call void @llvm.assume(i1 [[C]])
; VF16UF1-NEXT: [[FR:%.*]] = freeze i64 [[X]]
; VF16UF1-NEXT: [[STEP:%.*]] = add i64 [[FR]], -65534
; VF16UF1-NEXT: [[TMP0:%.*]] = udiv i64 15, [[STEP]]
; VF16UF1-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
; VF16UF1-NEXT: br label %[[VECTOR_PH:.*]]
; VF16UF1: [[VECTOR_PH]]:
; VF16UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1
; VF16UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; VF16UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF16UF1: [[VECTOR_BODY]]:
; VF16UF1-NEXT: [[TMP3:%.*]] = icmp ule <16 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[BROADCAST_SPLAT]]
; VF16UF1-NEXT: [[TMP4:%.*]] = extractelement <16 x i1> [[TMP3]], i32 0
; VF16UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF16UF1: [[PRED_STORE_IF]]:
; VF16UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]]
; VF16UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]]
; VF16UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
; VF16UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP8]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF16UF1: [[PRED_STORE_CONTINUE]]:
; VF16UF1-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP3]], i32 1
; VF16UF1-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; VF16UF1: [[PRED_STORE_IF1]]:
; VF16UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[STEP]]
; VF16UF1-NEXT: [[TMP11:%.*]] = add i64 0, [[TMP10]]
; VF16UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[STEP]]
; VF16UF1-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP13]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; VF16UF1: [[PRED_STORE_CONTINUE2]]:
; VF16UF1-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP3]], i32 2
; VF16UF1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF16UF1: [[PRED_STORE_IF3]]:
; VF16UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[STEP]]
; VF16UF1-NEXT: [[TMP16:%.*]] = add i64 0, [[TMP15]]
; VF16UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[STEP]]
; VF16UF1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP18]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; VF16UF1: [[PRED_STORE_CONTINUE4]]:
; VF16UF1-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP3]], i32 3
; VF16UF1-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF16UF1: [[PRED_STORE_IF5]]:
; VF16UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[STEP]]
; VF16UF1-NEXT: [[TMP21:%.*]] = add i64 0, [[TMP20]]
; VF16UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[STEP]]
; VF16UF1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP22]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP23]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; VF16UF1: [[PRED_STORE_CONTINUE6]]:
; VF16UF1-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP3]], i32 4
; VF16UF1-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF16UF1: [[PRED_STORE_IF7]]:
; VF16UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[STEP]]
; VF16UF1-NEXT: [[TMP26:%.*]] = add i64 0, [[TMP25]]
; VF16UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[STEP]]
; VF16UF1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP28]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]]
; VF16UF1: [[PRED_STORE_CONTINUE8]]:
; VF16UF1-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP3]], i32 5
; VF16UF1-NEXT: br i1 [[TMP29]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF16UF1: [[PRED_STORE_IF9]]:
; VF16UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[STEP]]
; VF16UF1-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]]
; VF16UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[STEP]]
; VF16UF1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP32]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP33]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]]
; VF16UF1: [[PRED_STORE_CONTINUE10]]:
; VF16UF1-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP3]], i32 6
; VF16UF1-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF16UF1: [[PRED_STORE_IF11]]:
; VF16UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[STEP]]
; VF16UF1-NEXT: [[TMP36:%.*]] = add i64 0, [[TMP35]]
; VF16UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[STEP]]
; VF16UF1-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP37]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP38]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]]
; VF16UF1: [[PRED_STORE_CONTINUE12]]:
; VF16UF1-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP3]], i32 7
; VF16UF1-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF16UF1: [[PRED_STORE_IF13]]:
; VF16UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[STEP]]
; VF16UF1-NEXT: [[TMP41:%.*]] = add i64 0, [[TMP40]]
; VF16UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[STEP]]
; VF16UF1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP42]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP43]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]]
; VF16UF1: [[PRED_STORE_CONTINUE14]]:
; VF16UF1-NEXT: [[TMP44:%.*]] = extractelement <16 x i1> [[TMP3]], i32 8
; VF16UF1-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
; VF16UF1: [[PRED_STORE_IF15]]:
; VF16UF1-NEXT: [[TMP45:%.*]] = mul i64 8, [[STEP]]
; VF16UF1-NEXT: [[TMP46:%.*]] = add i64 0, [[TMP45]]
; VF16UF1-NEXT: [[TMP47:%.*]] = add i64 [[TMP46]], [[STEP]]
; VF16UF1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP47]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP48]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE16]]
; VF16UF1: [[PRED_STORE_CONTINUE16]]:
; VF16UF1-NEXT: [[TMP49:%.*]] = extractelement <16 x i1> [[TMP3]], i32 9
; VF16UF1-NEXT: br i1 [[TMP49]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
; VF16UF1: [[PRED_STORE_IF17]]:
; VF16UF1-NEXT: [[TMP50:%.*]] = mul i64 9, [[STEP]]
; VF16UF1-NEXT: [[TMP51:%.*]] = add i64 0, [[TMP50]]
; VF16UF1-NEXT: [[TMP52:%.*]] = add i64 [[TMP51]], [[STEP]]
; VF16UF1-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP52]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP53]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE18]]
; VF16UF1: [[PRED_STORE_CONTINUE18]]:
; VF16UF1-NEXT: [[TMP54:%.*]] = extractelement <16 x i1> [[TMP3]], i32 10
; VF16UF1-NEXT: br i1 [[TMP54]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
; VF16UF1: [[PRED_STORE_IF19]]:
; VF16UF1-NEXT: [[TMP55:%.*]] = mul i64 10, [[STEP]]
; VF16UF1-NEXT: [[TMP56:%.*]] = add i64 0, [[TMP55]]
; VF16UF1-NEXT: [[TMP57:%.*]] = add i64 [[TMP56]], [[STEP]]
; VF16UF1-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP57]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP58]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE20]]
; VF16UF1: [[PRED_STORE_CONTINUE20]]:
; VF16UF1-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP3]], i32 11
; VF16UF1-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
; VF16UF1: [[PRED_STORE_IF21]]:
; VF16UF1-NEXT: [[TMP60:%.*]] = mul i64 11, [[STEP]]
; VF16UF1-NEXT: [[TMP61:%.*]] = add i64 0, [[TMP60]]
; VF16UF1-NEXT: [[TMP62:%.*]] = add i64 [[TMP61]], [[STEP]]
; VF16UF1-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP62]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP63]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE22]]
; VF16UF1: [[PRED_STORE_CONTINUE22]]:
; VF16UF1-NEXT: [[TMP64:%.*]] = extractelement <16 x i1> [[TMP3]], i32 12
; VF16UF1-NEXT: br i1 [[TMP64]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
; VF16UF1: [[PRED_STORE_IF23]]:
; VF16UF1-NEXT: [[TMP65:%.*]] = mul i64 12, [[STEP]]
; VF16UF1-NEXT: [[TMP66:%.*]] = add i64 0, [[TMP65]]
; VF16UF1-NEXT: [[TMP67:%.*]] = add i64 [[TMP66]], [[STEP]]
; VF16UF1-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP67]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP68]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE24]]
; VF16UF1: [[PRED_STORE_CONTINUE24]]:
; VF16UF1-NEXT: [[TMP69:%.*]] = extractelement <16 x i1> [[TMP3]], i32 13
; VF16UF1-NEXT: br i1 [[TMP69]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
; VF16UF1: [[PRED_STORE_IF25]]:
; VF16UF1-NEXT: [[TMP70:%.*]] = mul i64 13, [[STEP]]
; VF16UF1-NEXT: [[TMP71:%.*]] = add i64 0, [[TMP70]]
; VF16UF1-NEXT: [[TMP72:%.*]] = add i64 [[TMP71]], [[STEP]]
; VF16UF1-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP72]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP73]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE26]]
; VF16UF1: [[PRED_STORE_CONTINUE26]]:
; VF16UF1-NEXT: [[TMP74:%.*]] = extractelement <16 x i1> [[TMP3]], i32 14
; VF16UF1-NEXT: br i1 [[TMP74]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
; VF16UF1: [[PRED_STORE_IF27]]:
; VF16UF1-NEXT: [[TMP75:%.*]] = mul i64 14, [[STEP]]
; VF16UF1-NEXT: [[TMP76:%.*]] = add i64 0, [[TMP75]]
; VF16UF1-NEXT: [[TMP77:%.*]] = add i64 [[TMP76]], [[STEP]]
; VF16UF1-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP77]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP78]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE28]]
; VF16UF1: [[PRED_STORE_CONTINUE28]]:
; VF16UF1-NEXT: [[TMP79:%.*]] = extractelement <16 x i1> [[TMP3]], i32 15
; VF16UF1-NEXT: br i1 [[TMP79]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
; VF16UF1: [[PRED_STORE_IF29]]:
; VF16UF1-NEXT: [[TMP80:%.*]] = mul i64 15, [[STEP]]
; VF16UF1-NEXT: [[TMP81:%.*]] = add i64 0, [[TMP80]]
; VF16UF1-NEXT: [[TMP82:%.*]] = add i64 [[TMP81]], [[STEP]]
; VF16UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP82]]
; VF16UF1-NEXT: store i8 0, ptr [[GEP_DST]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE30]]
; VF16UF1: [[PRED_STORE_CONTINUE30]]:
; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF16UF1: [[MIDDLE_BLOCK]]:
; VF16UF1-NEXT: br label %[[EXIT:.*]]
; VF16UF1: [[EXIT]]:
; VF16UF1-NEXT: ret void
;
entry:
%c = icmp eq i64 %x, 65536
call void @llvm.assume(i1 %c)
%fr = freeze i64 %x
%step = add i64 %fr, -65534
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%iv.next = add i64 %iv, %step
%gep.dst = getelementptr i8, ptr %dst, i64 %iv.next
store i8 0, ptr %gep.dst, align 1
%ec = icmp slt i64 %iv.next, 16
br i1 %ec, label %loop, label %exit
exit:
ret void
}
define void @test_vector_tc_eq_16(ptr %A) {
; VF8UF1-LABEL: define void @test_vector_tc_eq_16(
; VF8UF1-SAME: ptr [[A:%.*]]) {
; VF8UF1-NEXT: [[ENTRY:.*:]]
; VF8UF1-NEXT: br label %[[VECTOR_PH:.*]]
; VF8UF1: [[VECTOR_PH]]:
; VF8UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF1: [[VECTOR_BODY]]:
; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF8UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[NEXT_GEP]], align 1
; VF8UF1-NEXT: [[TMP1:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
; VF8UF1-NEXT: store <8 x i8> [[TMP1]], ptr [[NEXT_GEP]], align 1
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; VF8UF1-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
; VF8UF1-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; VF8UF1: [[MIDDLE_BLOCK]]:
; VF8UF1-NEXT: br label %[[SCALAR_PH:.*]]
; VF8UF1: [[SCALAR_PH]]:
; VF8UF1-NEXT: br label %[[LOOP:.*]]
; VF8UF1: [[LOOP]]:
; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; VF8UF1-NEXT: [[P_SRC:%.*]] = phi ptr [ [[TMP0]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
; VF8UF1-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
; VF8UF1-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
; VF8UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; VF8UF1: [[EXIT]]:
; VF8UF1-NEXT: ret void
;
; VF8UF2-LABEL: define void @test_vector_tc_eq_16(
; VF8UF2-SAME: ptr [[A:%.*]]) {
; VF8UF2-NEXT: [[ENTRY:.*:]]
; VF8UF2-NEXT: br label %[[VECTOR_PH:.*]]
; VF8UF2: [[VECTOR_PH]]:
; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF2: [[VECTOR_BODY]]:
; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 8
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
; VF8UF2-NEXT: [[TMP2:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10)
; VF8UF2-NEXT: store <8 x i8> [[TMP2]], ptr [[A]], align 1
; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[TMP1]], align 1
; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: br label %[[SCALAR_PH:.*]]
; VF8UF2: [[SCALAR_PH]]:
; VF8UF2-NEXT: br label %[[LOOP:.*]]
; VF8UF2: [[LOOP]]:
; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; VF8UF2-NEXT: [[P_SRC:%.*]] = phi ptr [ [[TMP0]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
; VF8UF2-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
; VF8UF2-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
; VF8UF2-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; VF8UF2: [[EXIT]]:
; VF8UF2-NEXT: ret void
;
; VF16UF1-LABEL: define void @test_vector_tc_eq_16(
; VF16UF1-SAME: ptr [[A:%.*]]) {
; VF16UF1-NEXT: [[ENTRY:.*:]]
; VF16UF1-NEXT: br label %[[VECTOR_PH:.*]]
; VF16UF1: [[VECTOR_PH]]:
; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF16UF1: [[VECTOR_BODY]]:
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[A]], align 1
; VF16UF1-NEXT: [[TMP1:%.*]] = add nsw <16 x i8> [[WIDE_LOAD]], splat (i8 10)
; VF16UF1-NEXT: store <16 x i8> [[TMP1]], ptr [[A]], align 1
; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF16UF1: [[MIDDLE_BLOCK]]:
; VF16UF1-NEXT: br label %[[SCALAR_PH:.*]]
; VF16UF1: [[SCALAR_PH]]:
; VF16UF1-NEXT: br label %[[LOOP:.*]]
; VF16UF1: [[LOOP]]:
; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; VF16UF1-NEXT: [[P_SRC:%.*]] = phi ptr [ [[TMP0]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
; VF16UF1-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
; VF16UF1-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
; VF16UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; VF16UF1: [[EXIT]]:
; VF16UF1-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%p.src = phi ptr [ %A, %entry ], [ %p.src.next, %loop ]
%p.src.next = getelementptr inbounds i8, ptr %p.src, i64 1
%l = load i8, ptr %p.src, align 1
%add = add nsw i8 %l, 10
store i8 %add, ptr %p.src
%iv.next = add nsw i64 %iv, 1
%cmp = icmp eq i64 %iv.next, 17
br i1 %cmp, label %exit, label %loop
exit:
ret void
}
;.
; VF8UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; VF8UF1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; VF8UF1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; VF8UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; VF8UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; VF8UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; VF8UF1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; VF8UF1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
;.
; VF8UF2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; VF8UF2: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
; VF8UF2: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
; VF8UF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
;.
; VF16UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; VF16UF1: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
; VF16UF1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
; VF16UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
;.