Move creation of the minimum iteration check for the epilogue vector loop to VPlan. This is a first step towards breaking up and moving skeleton creation for epilogue vectorization to VPlan. It moves most logic out of EpilogueVectorizerEpilogueLoop: the minimum iteration check is created directly in VPlan, connecting the check blocks from the main vector loop is done as post-processing. Next steps are to move connecting and updating the branches from the check blocks to VPlan, as well as updating the incoming values for phis. Test changes are improvements due to folding of live-ins. PR: https://github.com/llvm/llvm-project/pull/157545
101 lines
5.5 KiB
LLVM
101 lines
5.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -p loop-vectorize -force-vector-width=4 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=4 -S %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
|
|
|
define void @trunc_iv_steps_with_epilogue(ptr %A, i64 %N) {
|
|
; CHECK-LABEL: define void @trunc_iv_steps_with_epilogue(
|
|
; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
|
|
; CHECK-NEXT: iter.check:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
|
|
; CHECK: vector.scevcheck:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N]] to i32
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[N]], 4294967295
|
|
; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
|
|
; CHECK-NEXT: br i1 [[TMP4]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
|
|
; CHECK: vector.main.loop.iter.check:
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[INDEX]] to i32
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP5]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
|
|
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[WIDE_LOAD]], splat (i8 2)
|
|
; CHECK-NEXT: store <4 x i8> [[TMP9]], ptr [[TMP7]], align 1
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
|
|
; CHECK: vec.epilog.iter.check:
|
|
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
|
|
; CHECK: vec.epilog.ph:
|
|
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
|
|
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4
|
|
; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]]
|
|
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
|
|
; CHECK: vec.epilog.vector.body:
|
|
; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[INDEX5]] to i32
|
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP11]]
|
|
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1
|
|
; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i8> [[WIDE_LOAD6]], splat (i8 2)
|
|
; CHECK-NEXT: store <4 x i8> [[TMP15]], ptr [[TMP13]], align 1
|
|
; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], 4
|
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC3]]
|
|
; CHECK-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; CHECK: vec.epilog.middle.block:
|
|
; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
|
|
; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
|
|
; CHECK: vec.epilog.scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[IV_I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[T_IV:%.*]] = trunc i64 [[IV_I]] to i32
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[T_IV]]
|
|
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP]], align 1
|
|
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[L]], 2
|
|
; CHECK-NEXT: store i8 [[ADD]], ptr [[GEP]], align 1
|
|
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV_I]], 1
|
|
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_I]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv.i = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%t.iv = trunc i64 %iv.i to i32
|
|
%gep = getelementptr i8, ptr %A, i32 %t.iv
|
|
%l = load i8, ptr %gep, align 1
|
|
%add = add i8 %l, 2
|
|
store i8 %add, ptr %gep
|
|
%iv.next = add i64 %iv.i, 1
|
|
%ec = icmp eq i64 %iv.i, %N
|
|
br i1 %ec, label %exit, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
;.
|
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; CHECK: [[PROF3]] = !{!"branch_weights", i32 4, i32 0}
|
|
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
|
|
;.
|