Philip Reames c416f6700f [IVDescriptors] Add pointer InductionDescriptors with non-constant strides (try 2)
(JFYI - This has been heavily reframed since original attempt at landing.)

This change updates the InductionDescriptor logic to allow matching a pointer IV with a non-constant stride, but also updates the LoopVectorizer to bailout on such descriptors by default. This preserves the default vectorizer behavior.

In review, it was pointed out that there's multiple unfortunate performance implications which need to be addressed before this can be enabled. Having a flag allows us to exercise the behavior, and write test cases for logic which is otherwise unreachable (or hard to reach).

This will also enable non-constant stride pointer recurrences for other consumers. I've audited said code, and don't see any obvious issues.

Differential Revision: https://reviews.llvm.org/D147336
2023-04-05 09:32:35 -07:00

295 lines
17 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck --check-prefixes=CHECK,DEFAULT %s
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -lv-strided-pointer-ivs=true -S | FileCheck --check-prefixes=CHECK,STRIDED %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nofree norecurse nounwind
define void @a(ptr readnone %b) {
; CHECK-LABEL: @a(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B1:%.*]] = ptrtoint ptr [[B:%.*]] to i64
; CHECK-NEXT: [[CMP_NOT4:%.*]] = icmp eq ptr [[B]], null
; CHECK-NEXT: br i1 [[CMP_NOT4]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[B1]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[N_VEC]], -1
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP1]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], -1
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 -3
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
; CHECK-NEXT: store i8 95, ptr [[TMP10]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], -1
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP13]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 -1
; CHECK-NEXT: store i8 95, ptr [[TMP14]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], -1
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 -1
; CHECK-NEXT: store i8 95, ptr [[TMP18]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]]
; CHECK: pred.store.if9:
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], -1
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP21]]
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP4]], i64 -1
; CHECK-NEXT: store i8 95, ptr [[TMP22]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
; CHECK: pred.store.continue10:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
; CHECK-NEXT: [[C_05:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[C_05]], i64 -1
; CHECK-NEXT: [[TMP24:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP24]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: store i8 95, ptr [[INCDEC_PTR]], align 1
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[B]]
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
;
entry:
%cmp.not4 = icmp eq ptr %b, null
br i1 %cmp.not4, label %for.cond.cleanup, label %for.body.preheader
for.body.preheader: ; preds = %entry
br label %for.body
for.cond.cleanup.loopexit: ; preds = %if.end
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret void
for.body: ; preds = %for.body.preheader, %if.end
%c.05 = phi ptr [ %incdec.ptr, %if.end ], [ null, %for.body.preheader ]
%incdec.ptr = getelementptr inbounds i8, ptr %c.05, i64 -1
%0 = load i8, ptr %incdec.ptr, align 1
%tobool.not = icmp eq i8 %0, 0
br i1 %tobool.not, label %if.end, label %if.then
if.then: ; preds = %for.body
store i8 95, ptr %incdec.ptr, align 1
br label %if.end
if.end: ; preds = %for.body, %if.then
%cmp.not = icmp eq ptr %incdec.ptr, %b
br i1 %cmp.not, label %for.cond.cleanup.loopexit, label %for.body
}
; In the test below the pointer phi %ptr.iv.2 is used as
; 1. As a uniform address for the load, and
; 2. Non-uniform use by the getelementptr which is stored. This requires the
; vector value.
define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias %start.2, i64 %N) {
; CHECK-LABEL: @pointer_induction_used_as_vector(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[N_VEC]], 8
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[START_2:%.*]], i64 [[N_VEC]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 8
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP3]], i64 1
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0
; CHECK-NEXT: store <4 x ptr> [[TMP4]], ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT: store <4 x i8> [[TMP8]], ptr [[TMP7]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[START_2]], [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP_BODY:%.*]]
; CHECK: loop.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_BODY]] ]
; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[PTR_IV_1_NEXT:%.*]], [[LOOP_BODY]] ]
; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[PTR_IV_2_NEXT:%.*]], [[LOOP_BODY]] ]
; CHECK-NEXT: [[PTR_IV_1_NEXT]] = getelementptr inbounds ptr, ptr [[PTR_IV_1]], i64 1
; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV_2]], i64 1
; CHECK-NEXT: store ptr [[PTR_IV_2_NEXT]], ptr [[PTR_IV_1]], align 8
; CHECK-NEXT: [[LV:%.*]] = load i8, ptr [[PTR_IV_2]], align 1
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LV]], 1
; CHECK-NEXT: store i8 [[ADD]], ptr [[PTR_IV_2]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
; CHECK-NEXT: [[C:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[C]], label [[LOOP_BODY]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop.body
loop.body: ; preds = %loop.body, %entry
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.body ]
%ptr.iv.1 = phi ptr [ %start.1, %entry ], [ %ptr.iv.1.next, %loop.body ]
%ptr.iv.2 = phi ptr [ %start.2, %entry ], [ %ptr.iv.2.next, %loop.body ]
%ptr.iv.1.next = getelementptr inbounds ptr, ptr %ptr.iv.1, i64 1
%ptr.iv.2.next = getelementptr inbounds i8, ptr %ptr.iv.2, i64 1
store ptr %ptr.iv.2.next, ptr %ptr.iv.1, align 8
%lv = load i8, ptr %ptr.iv.2, align 1
%add = add i8 %lv, 1
store i8 %add, ptr %ptr.iv.2, align 1
%iv.next = add nuw i64 %iv, 1
%c = icmp ne i64 %iv.next, %N
br i1 %c, label %loop.body, label %exit
exit: ; preds = %loop.body
ret void
}
; Test the vector expansion of a non-constant stride pointer IV
define void @non_constant_vector_expansion(i32 %0, ptr %call) {
; DEFAULT-LABEL: @non_constant_vector_expansion(
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[MUL:%.*]] = shl i32 [[TMP0:%.*]], 1
; DEFAULT-NEXT: br label [[FOR_COND:%.*]]
; DEFAULT: for.cond:
; DEFAULT-NEXT: [[TMP1:%.*]] = phi i32 [ 30, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
; DEFAULT-NEXT: [[P_0:%.*]] = phi ptr [ null, [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_COND]] ]
; DEFAULT-NEXT: [[ADD_PTR]] = getelementptr i8, ptr [[P_0]], i32 [[MUL]]
; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[TMP1]]
; DEFAULT-NEXT: store ptr [[P_0]], ptr [[ARRAYIDX]], align 4
; DEFAULT-NEXT: [[INC]] = add i32 [[TMP1]], 1
; DEFAULT-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
; DEFAULT-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_END:%.*]], label [[FOR_COND]]
; DEFAULT: for.end:
; DEFAULT-NEXT: ret void
;
; STRIDED-LABEL: @non_constant_vector_expansion(
; STRIDED-NEXT: entry:
; STRIDED-NEXT: [[MUL:%.*]] = shl i32 [[TMP0:%.*]], 1
; STRIDED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; STRIDED: vector.scevcheck:
; STRIDED-NEXT: br i1 true, label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; STRIDED: vector.ph:
; STRIDED-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64
; STRIDED-NEXT: [[TMP2:%.*]] = mul i64 4294967264, [[TMP1]]
; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]]
; STRIDED-NEXT: [[TMP3:%.*]] = sext i32 [[MUL]] to i64
; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; STRIDED: vector.body:
; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0
; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = mul <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[DOTSPLAT]]
; STRIDED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[VECTOR_GEP]]
; STRIDED-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32
; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = add i32 30, [[DOTCAST]]
; STRIDED-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 0
; STRIDED-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[TMP6]]
; STRIDED-NEXT: [[TMP8:%.*]] = getelementptr ptr, ptr [[TMP7]], i32 0
; STRIDED-NEXT: store <4 x ptr> [[TMP5]], ptr [[TMP8]], align 4
; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP4]]
; STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967264
; STRIDED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; STRIDED: middle.block:
; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 4294967267, 4294967264
; STRIDED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; STRIDED: scalar.ph:
; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -2, [[MIDDLE_BLOCK]] ], [ 30, [[ENTRY:%.*]] ], [ 30, [[VECTOR_SCEVCHECK]] ]
; STRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[ENTRY]] ], [ null, [[VECTOR_SCEVCHECK]] ]
; STRIDED-NEXT: br label [[FOR_COND:%.*]]
; STRIDED: for.cond:
; STRIDED-NEXT: [[TMP10:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
; STRIDED-NEXT: [[P_0:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_COND]] ]
; STRIDED-NEXT: [[ADD_PTR]] = getelementptr i8, ptr [[P_0]], i32 [[MUL]]
; STRIDED-NEXT: [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP10]]
; STRIDED-NEXT: store ptr [[P_0]], ptr [[ARRAYIDX]], align 4
; STRIDED-NEXT: [[INC]] = add i32 [[TMP10]], 1
; STRIDED-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP10]], 0
; STRIDED-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
; STRIDED: for.end:
; STRIDED-NEXT: ret void
;
entry:
%mul = shl i32 %0, 1
br label %for.cond
for.cond: ; preds = %for.body, %entry
%1 = phi i32 [ 30, %entry ], [ %inc, %for.cond ]
%p.0 = phi ptr [ null, %entry ], [ %add.ptr, %for.cond ]
%add.ptr = getelementptr i8, ptr %p.0, i32 %mul
%arrayidx = getelementptr ptr, ptr %call, i32 %1
store ptr %p.0, ptr %arrayidx, align 4
%inc = add i32 %1, 1
%tobool.not = icmp eq i32 %1, 0
br i1 %tobool.not, label %for.end, label %for.cond
for.end: ; preds = %for.cond
ret void
}