
I've changed how we construct the EpilogueVectorizerEpilogueLoop and EpilogueVectorizerMainLoop classes so that we construct the parent class with an additional boolean parameter indicating whether we're vectorising the main or epilogue loop. The InnerLoopAndEpilogueVectorizer class uses this new argument in combination with the EpilogueLoopVectorizationInfo struct to set the right UF and VF values. This then allows EpilogueVectorizerEpilogueLoop to access the correct values of VF and UF for the main loop, which are required when setting branch weights in the minimum iteration check block.
148 lines
9.1 KiB
LLVM
148 lines
9.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "br " --filter "^.*:" --filter "icmp" --version 5
|
|
; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-epilogue-vectorization \
|
|
; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC1_EPI4
|
|
; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-epilogue-vectorization \
|
|
; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC2_EPI4
|
|
|
|
define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 {
|
|
; MAINVF4IC1_EPI4-LABEL: define void @f0(
|
|
; MAINVF4IC1_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
|
|
; MAINVF4IC1_EPI4: [[ENTRY:.*:]]
|
|
; MAINVF4IC1_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
|
|
; MAINVF4IC1_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[ITER_CHECK]]:
|
|
; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
|
|
; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[VECTOR_SCEVCHECK]]:
|
|
; MAINVF4IC1_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
|
|
; MAINVF4IC1_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
|
|
; MAINVF4IC1_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
|
|
; MAINVF4IC1_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
|
|
; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 4
|
|
; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
|
|
; MAINVF4IC1_EPI4: [[VECTOR_PH]]:
|
|
; MAINVF4IC1_EPI4: br label %[[VECTOR_BODY:.*]]
|
|
; MAINVF4IC1_EPI4: [[VECTOR_BODY]]:
|
|
; MAINVF4IC1_EPI4: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
|
|
; MAINVF4IC1_EPI4: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[MIDDLE_BLOCK]]:
|
|
; MAINVF4IC1_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
|
|
; MAINVF4IC1_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[VEC_EPILOG_ITER_CHECK]]:
|
|
; MAINVF4IC1_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
|
|
; MAINVF4IC1_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[VEC_EPILOG_PH]]:
|
|
; MAINVF4IC1_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
|
|
; MAINVF4IC1_EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
|
|
; MAINVF4IC1_EPI4: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
|
|
; MAINVF4IC1_EPI4: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
|
|
; MAINVF4IC1_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
|
|
; MAINVF4IC1_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]]
|
|
; MAINVF4IC1_EPI4: [[VEC_EPILOG_SCALAR_PH]]:
|
|
; MAINVF4IC1_EPI4: br label %[[LOOP:.*]]
|
|
; MAINVF4IC1_EPI4: [[LOOP]]:
|
|
; MAINVF4IC1_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
|
|
; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[EXIT_LOOPEXIT]]:
|
|
; MAINVF4IC1_EPI4: br label %[[EXIT]]
|
|
; MAINVF4IC1_EPI4: [[EXIT]]:
|
|
;
|
|
; MAINVF4IC2_EPI4-LABEL: define void @f0(
|
|
; MAINVF4IC2_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
|
|
; MAINVF4IC2_EPI4: [[ENTRY:.*:]]
|
|
; MAINVF4IC2_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
|
|
; MAINVF4IC2_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[ITER_CHECK]]:
|
|
; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
|
|
; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[VECTOR_SCEVCHECK]]:
|
|
; MAINVF4IC2_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
|
|
; MAINVF4IC2_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
|
|
; MAINVF4IC2_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
|
|
; MAINVF4IC2_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
|
|
; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 8
|
|
; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
|
|
; MAINVF4IC2_EPI4: [[VECTOR_PH]]:
|
|
; MAINVF4IC2_EPI4: br label %[[VECTOR_BODY:.*]]
|
|
; MAINVF4IC2_EPI4: [[VECTOR_BODY]]:
|
|
; MAINVF4IC2_EPI4: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
|
|
; MAINVF4IC2_EPI4: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[MIDDLE_BLOCK]]:
|
|
; MAINVF4IC2_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
|
|
; MAINVF4IC2_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[VEC_EPILOG_ITER_CHECK]]:
|
|
; MAINVF4IC2_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
|
|
; MAINVF4IC2_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[VEC_EPILOG_PH]]:
|
|
; MAINVF4IC2_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
|
|
; MAINVF4IC2_EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
|
|
; MAINVF4IC2_EPI4: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
|
|
; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
|
|
; MAINVF4IC2_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
|
|
; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[VEC_EPILOG_SCALAR_PH]]:
|
|
; MAINVF4IC2_EPI4: br label %[[LOOP:.*]]
|
|
; MAINVF4IC2_EPI4: [[LOOP]]:
|
|
; MAINVF4IC2_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
|
|
; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[EXIT_LOOPEXIT]]:
|
|
; MAINVF4IC2_EPI4: br label %[[EXIT]]
|
|
; MAINVF4IC2_EPI4: [[EXIT]]:
|
|
;
|
|
entry:
|
|
%cmp.entry = icmp sgt i32 %len, 0
|
|
br i1 %cmp.entry, label %loop, label %exit, !prof !1
|
|
|
|
loop:
|
|
%i8 = phi i8 [0, %entry], [%i8.inc, %loop]
|
|
%i32 = phi i32 [0, %entry], [%i32.inc, %loop]
|
|
|
|
%ptr = getelementptr inbounds i32, ptr %p, i8 %i8
|
|
store i32 %i32, ptr %ptr
|
|
|
|
%i8.inc = add i8 %i8, 1
|
|
%i32.inc = add i32 %i32, 1
|
|
|
|
%cmp.loop = icmp ult i32 %i32, %len
|
|
br i1 %cmp.loop, label %loop, label %exit, !prof !2
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
!0 = !{!"function_entry_count", i64 13}
|
|
!1 = !{!"branch_weights", i32 12, i32 1}
|
|
!2 = !{!"branch_weights", i32 1234, i32 1}
|
|
;.
|
|
; MAINVF4IC1_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
|
|
; MAINVF4IC1_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
|
|
; MAINVF4IC1_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
|
|
; MAINVF4IC1_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 307}
|
|
; MAINVF4IC1_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
|
|
; MAINVF4IC1_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; MAINVF4IC1_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; MAINVF4IC1_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 3}
|
|
; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0}
|
|
; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
|
|
; MAINVF4IC1_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]}
|
|
; MAINVF4IC1_EPI4: [[PROF11]] = !{!"branch_weights", i32 2, i32 1}
|
|
; MAINVF4IC1_EPI4: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]}
|
|
;.
|
|
; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
|
|
; MAINVF4IC2_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
|
|
; MAINVF4IC2_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
|
|
; MAINVF4IC2_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 153}
|
|
; MAINVF4IC2_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
|
|
; MAINVF4IC2_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; MAINVF4IC2_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; MAINVF4IC2_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 7}
|
|
; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 4}
|
|
; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
|
|
; MAINVF4IC2_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]}
|
|
; MAINVF4IC2_EPI4: [[PROF11]] = !{!"branch_weights", i32 1, i32 3}
|
|
; MAINVF4IC2_EPI4: [[PROF12]] = !{!"branch_weights", i32 2, i32 1}
|
|
; MAINVF4IC2_EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]}
|
|
;.
|