; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; This test verifies that the loop vectorizer will not vectorizes low trip count ; loops that require runtime checks (Trip count is computed with profile info). ; REQUIRES: asserts ; RUN: opt < %s -passes=loop-vectorize -loop-vectorize-with-block-frequency -S | FileCheck %s target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128" @tab = common global [32 x i8] zeroinitializer, align 1 ;. ; CHECK: @tab = common global [32 x i8] zeroinitializer, align 1 ;. define i32 @foo_low_trip_count1(i32 %bound) { ; Simple loop with low tripcount. Should not be vectorized. ; CHECK-LABEL: define i32 @foo_low_trip_count1( ; CHECK-SAME: i32 [[BOUND:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], [[BOUND]] ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !prof [[PROF0:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret i32 0 ; entry: br label %for.body for.body: ; preds = %for.body, %entry %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 %0 = load i8, ptr %arrayidx, align 1 %cmp1 = icmp eq i8 %0, 0 %. = select i1 %cmp1, i8 2, i8 1 store i8 %., ptr %arrayidx, align 1 %inc = add nsw i32 %i.08, 1 %exitcond = icmp eq i32 %i.08, %bound br i1 %exitcond, label %for.end, label %for.body, !prof !1 for.end: ; preds = %for.body ret i32 0 } define i32 @foo_low_trip_count2(i32 %bound) !prof !0 { ; The loop has a same invocation count with the function, but has a low ; trip_count per invocation and not worth to vectorize. ; CHECK-LABEL: define i32 @foo_low_trip_count2( ; CHECK-SAME: i32 [[BOUND:%.*]]) !prof [[PROF1:![0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], [[BOUND]] ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !prof [[PROF0]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret i32 0 ; entry: br label %for.body for.body: ; preds = %for.body, %entry %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 %0 = load i8, ptr %arrayidx, align 1 %cmp1 = icmp eq i8 %0, 0 %. = select i1 %cmp1, i8 2, i8 1 store i8 %., ptr %arrayidx, align 1 %inc = add nsw i32 %i.08, 1 %exitcond = icmp eq i32 %i.08, %bound br i1 %exitcond, label %for.end, label %for.body, !prof !1 for.end: ; preds = %for.body ret i32 0 } define i32 @foo_low_trip_count3(i1 %cond, i32 %bound) !prof !0 { ; The loop has low invocation count compare to the function invocation count, ; but has a high trip count per invocation. Vectorize it. ; The original loop has latchExitWeight=10 and backedgeTakenWeight=10,000, ; therefore estimatedBackedgeTakenCount=1,000 and estimatedTripCount=1,001. ; Vectorizing by 4 produces estimatedTripCounts of 1,001/4=250 and 1,001%4=1 ; for vectorized and remainder loops, respectively, therefore their ; estimatedBackedgeTakenCounts are 249 and 0, and so the weights recorded with ; loop invocation weights of 10 are the above {10, 2490} and {10, 0}. This ; explains the values for PROF4 and PROF10 ; CHECK-LABEL: define i32 @foo_low_trip_count3( ; CHECK-SAME: i1 [[COND:%.*]], i32 [[BOUND:%.*]]) !prof [[PROF1]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 [[COND]], label %[[FOR_PREHEADER:.*]], label %[[FOR_END:.*]], !prof [[PROF2:![0-9]+]] ; CHECK: [[FOR_PREHEADER]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BOUND]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF3:![0-9]+]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i8> splat (i8 2), <4 x i8> splat (i8 1) ; CHECK-NEXT: store <4 x i8> [[TMP3]], ptr [[TMP1]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF4:![0-9]+]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]], !prof [[PROF9:![0-9]+]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_PREHEADER]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] ; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP5]], 0 ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], [[BOUND]] ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[FOR_END_LOOPEXIT]]: ; CHECK-NEXT: br label %[[FOR_END]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret i32 0 ; entry: br i1 %cond, label %for.preheader, label %for.end, !prof !2 for.preheader: br label %for.body for.body: ; preds = %for.body, %entry %i.08 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ] %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 %0 = load i8, ptr %arrayidx, align 1 %cmp1 = icmp eq i8 %0, 0 %. = select i1 %cmp1, i8 2, i8 1 store i8 %., ptr %arrayidx, align 1 %inc = add nsw i32 %i.08, 1 %exitcond = icmp eq i32 %i.08, %bound br i1 %exitcond, label %for.end, label %for.body, !prof !3 for.end: ; preds = %for.body ret i32 0 } define i32 @foo_low_trip_count_icmp_sgt(i32 %bound) { ; Simple loop with low tripcount and inequality test for exit. ; Should not be vectorized. ; CHECK-LABEL: define i32 @foo_low_trip_count_icmp_sgt( ; CHECK-SAME: i32 [[BOUND:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i32 [[I_08]], [[BOUND]] ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !prof [[PROF0]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret i32 0 ; entry: br label %for.body for.body: ; preds = %for.body, %entry %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 %0 = load i8, ptr %arrayidx, align 1 %cmp1 = icmp eq i8 %0, 0 %. = select i1 %cmp1, i8 2, i8 1 store i8 %., ptr %arrayidx, align 1 %inc = add nsw i32 %i.08, 1 %exitcond = icmp sgt i32 %i.08, %bound br i1 %exitcond, label %for.end, label %for.body, !prof !1 for.end: ; preds = %for.body ret i32 0 } define i32 @const_low_trip_count() { ; Simple loop with constant, small trip count and no profiling info. ; CHECK-LABEL: define i32 @const_low_trip_count() { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_08]], 2 ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret i32 0 ; entry: br label %for.body for.body: ; preds = %for.body, %entry %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 %0 = load i8, ptr %arrayidx, align 1 %cmp1 = icmp eq i8 %0, 0 %. = select i1 %cmp1, i8 2, i8 1 store i8 %., ptr %arrayidx, align 1 %inc = add nsw i32 %i.08, 1 %exitcond = icmp slt i32 %i.08, 2 br i1 %exitcond, label %for.body, label %for.end for.end: ; preds = %for.body ret i32 0 } define i32 @const_large_trip_count() { ; Simple loop with constant large trip count and no profiling info. ; CHECK-LABEL: define i32 @const_large_trip_count() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> splat (i8 2), <4 x i8> splat (i8 1) ; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP0]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH:.*]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 1000, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP4]], 0 ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_08]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret i32 0 ; entry: br label %for.body for.body: ; preds = %for.body, %entry %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 %0 = load i8, ptr %arrayidx, align 1 %cmp1 = icmp eq i8 %0, 0 %. = select i1 %cmp1, i8 2, i8 1 store i8 %., ptr %arrayidx, align 1 %inc = add nsw i32 %i.08, 1 %exitcond = icmp slt i32 %i.08, 1000 br i1 %exitcond, label %for.body, label %for.end for.end: ; preds = %for.body ret i32 0 } define i32 @const_small_trip_count_step() { ; Simple loop with static, small trip count and no profiling info. ; CHECK-LABEL: define i32 @const_small_trip_count_step() { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 5 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_08]], 10 ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret i32 0 ; entry: br label %for.body for.body: ; preds = %for.body, %entry %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 %0 = load i8, ptr %arrayidx, align 1 %cmp1 = icmp eq i8 %0, 0 %. = select i1 %cmp1, i8 2, i8 1 store i8 %., ptr %arrayidx, align 1 %inc = add nsw i32 %i.08, 5 %exitcond = icmp slt i32 %i.08, 10 br i1 %exitcond, label %for.body, label %for.end for.end: ; preds = %for.body ret i32 0 } define i32 @const_trip_over_profile() !prof !0 { ; constant trip count takes precedence over profile data ; CHECK-LABEL: define i32 @const_trip_over_profile( ; CHECK-SAME: ) !prof [[PROF1]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> splat (i8 2), <4 x i8> splat (i8 1) ; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP0]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF15:![0-9]+]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH:.*]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 1000, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP4]], 0 ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_08]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]], !prof [[PROF17:![0-9]+]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret i32 0 ; entry: br label %for.body for.body: ; preds = %for.body, %entry %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 %0 = load i8, ptr %arrayidx, align 1 %cmp1 = icmp eq i8 %0, 0 %. = select i1 %cmp1, i8 2, i8 1 store i8 %., ptr %arrayidx, align 1 %inc = add nsw i32 %i.08, 1 %exitcond = icmp slt i32 %i.08, 1000 br i1 %exitcond, label %for.body, label %for.end, !prof !1 for.end: ; preds = %for.body ret i32 0 } !0 = !{!"function_entry_count", i64 100} !1 = !{!"branch_weights", i32 100, i32 0} !2 = !{!"branch_weights", i32 10, i32 90} !3 = !{!"branch_weights", i32 10, i32 10000} ;. ; CHECK: [[PROF0]] = !{!"branch_weights", i32 100, i32 0} ; CHECK: [[PROF1]] = !{!"function_entry_count", i64 100} ; CHECK: [[PROF2]] = !{!"branch_weights", i32 10, i32 90} ; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 127} ; CHECK: [[PROF4]] = !{!"branch_weights", i32 10, i32 2490} ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]], [[META7:![0-9]+]], [[META8:![0-9]+]]} ; CHECK: [[META6]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META7]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[META8]] = !{!"llvm.loop.estimated_trip_count", i32 250} ; CHECK: [[PROF9]] = !{!"branch_weights", i32 1, i32 3} ; CHECK: [[PROF10]] = !{!"branch_weights", i32 10, i32 0} ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META7]], [[META6]], [[META12:![0-9]+]]} ; CHECK: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 1} ; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META6]], [[META7]]} ; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META7]], [[META6]]} ; CHECK: [[PROF15]] = !{!"branch_weights", i32 1, i32 249} ; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META6]], [[META7]], [[META8]]} ; CHECK: [[PROF17]] = !{!"branch_weights", i32 0, i32 1} ; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META7]], [[META6]], [[META12]]} ;.