David Sherwood 9181a7e294
[LV] Fix branch weights in epilogue min iteration check block (#152534)
I've changed how we construct the EpilogueVectorizerEpilogueLoop and
EpilogueVectorizerMainLoop classes so that we construct the parent class
with an additional boolean parameter indicating whether we're
vectorising the main or epilogue loop. The
InnerLoopAndEpilogueVectorizer class uses this new argument in
combination with the EpilogueLoopVectorizationInfo struct to set the
right UF and VF values. This then allows EpilogueVectorizerEpilogueLoop
to access the correct values of VF and UF for the main loop, which are
required when setting branch weights in the minimum iteration check
block.
2025-08-11 09:52:54 +01:00

148 lines
9.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "br " --filter "^.*:" --filter "icmp" --version 5
; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-epilogue-vectorization \
; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC1_EPI4
; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-epilogue-vectorization \
; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC2_EPI4
define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 {
; MAINVF4IC1_EPI4-LABEL: define void @f0(
; MAINVF4IC1_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
; MAINVF4IC1_EPI4: [[ENTRY:.*:]]
; MAINVF4IC1_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
; MAINVF4IC1_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
; MAINVF4IC1_EPI4: [[ITER_CHECK]]:
; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
; MAINVF4IC1_EPI4: [[VECTOR_SCEVCHECK]]:
; MAINVF4IC1_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
; MAINVF4IC1_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
; MAINVF4IC1_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
; MAINVF4IC1_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 4
; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
; MAINVF4IC1_EPI4: [[VECTOR_PH]]:
; MAINVF4IC1_EPI4: br label %[[VECTOR_BODY:.*]]
; MAINVF4IC1_EPI4: [[VECTOR_BODY]]:
; MAINVF4IC1_EPI4: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
; MAINVF4IC1_EPI4: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
; MAINVF4IC1_EPI4: [[MIDDLE_BLOCK]]:
; MAINVF4IC1_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; MAINVF4IC1_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
; MAINVF4IC1_EPI4: [[VEC_EPILOG_ITER_CHECK]]:
; MAINVF4IC1_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
; MAINVF4IC1_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
; MAINVF4IC1_EPI4: [[VEC_EPILOG_PH]]:
; MAINVF4IC1_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; MAINVF4IC1_EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
; MAINVF4IC1_EPI4: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
; MAINVF4IC1_EPI4: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
; MAINVF4IC1_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; MAINVF4IC1_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
; MAINVF4IC1_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]]
; MAINVF4IC1_EPI4: [[VEC_EPILOG_SCALAR_PH]]:
; MAINVF4IC1_EPI4: br label %[[LOOP:.*]]
; MAINVF4IC1_EPI4: [[LOOP]]:
; MAINVF4IC1_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]]
; MAINVF4IC1_EPI4: [[EXIT_LOOPEXIT]]:
; MAINVF4IC1_EPI4: br label %[[EXIT]]
; MAINVF4IC1_EPI4: [[EXIT]]:
;
; MAINVF4IC2_EPI4-LABEL: define void @f0(
; MAINVF4IC2_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
; MAINVF4IC2_EPI4: [[ENTRY:.*:]]
; MAINVF4IC2_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
; MAINVF4IC2_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
; MAINVF4IC2_EPI4: [[ITER_CHECK]]:
; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
; MAINVF4IC2_EPI4: [[VECTOR_SCEVCHECK]]:
; MAINVF4IC2_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
; MAINVF4IC2_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
; MAINVF4IC2_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
; MAINVF4IC2_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 8
; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
; MAINVF4IC2_EPI4: [[VECTOR_PH]]:
; MAINVF4IC2_EPI4: br label %[[VECTOR_BODY:.*]]
; MAINVF4IC2_EPI4: [[VECTOR_BODY]]:
; MAINVF4IC2_EPI4: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
; MAINVF4IC2_EPI4: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
; MAINVF4IC2_EPI4: [[MIDDLE_BLOCK]]:
; MAINVF4IC2_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; MAINVF4IC2_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
; MAINVF4IC2_EPI4: [[VEC_EPILOG_ITER_CHECK]]:
; MAINVF4IC2_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
; MAINVF4IC2_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
; MAINVF4IC2_EPI4: [[VEC_EPILOG_PH]]:
; MAINVF4IC2_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; MAINVF4IC2_EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
; MAINVF4IC2_EPI4: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
; MAINVF4IC2_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; MAINVF4IC2_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]]
; MAINVF4IC2_EPI4: [[VEC_EPILOG_SCALAR_PH]]:
; MAINVF4IC2_EPI4: br label %[[LOOP:.*]]
; MAINVF4IC2_EPI4: [[LOOP]]:
; MAINVF4IC2_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]]
; MAINVF4IC2_EPI4: [[EXIT_LOOPEXIT]]:
; MAINVF4IC2_EPI4: br label %[[EXIT]]
; MAINVF4IC2_EPI4: [[EXIT]]:
;
entry:
%cmp.entry = icmp sgt i32 %len, 0
br i1 %cmp.entry, label %loop, label %exit, !prof !1
loop:
%i8 = phi i8 [0, %entry], [%i8.inc, %loop]
%i32 = phi i32 [0, %entry], [%i32.inc, %loop]
%ptr = getelementptr inbounds i32, ptr %p, i8 %i8
store i32 %i32, ptr %ptr
%i8.inc = add i8 %i8, 1
%i32.inc = add i32 %i32, 1
%cmp.loop = icmp ult i32 %i32, %len
br i1 %cmp.loop, label %loop, label %exit, !prof !2
exit:
ret void
}
!0 = !{!"function_entry_count", i64 13}
!1 = !{!"branch_weights", i32 12, i32 1}
!2 = !{!"branch_weights", i32 1234, i32 1}
;.
; MAINVF4IC1_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
; MAINVF4IC1_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
; MAINVF4IC1_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
; MAINVF4IC1_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 307}
; MAINVF4IC1_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
; MAINVF4IC1_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
; MAINVF4IC1_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
; MAINVF4IC1_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 3}
; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0}
; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
; MAINVF4IC1_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]}
; MAINVF4IC1_EPI4: [[PROF11]] = !{!"branch_weights", i32 2, i32 1}
; MAINVF4IC1_EPI4: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]}
;.
; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
; MAINVF4IC2_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
; MAINVF4IC2_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
; MAINVF4IC2_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 153}
; MAINVF4IC2_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
; MAINVF4IC2_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
; MAINVF4IC2_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
; MAINVF4IC2_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 7}
; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 4}
; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
; MAINVF4IC2_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]}
; MAINVF4IC2_EPI4: [[PROF11]] = !{!"branch_weights", i32 1, i32 3}
; MAINVF4IC2_EPI4: [[PROF12]] = !{!"branch_weights", i32 2, i32 1}
; MAINVF4IC2_EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]}
;.