[LV] Handle vector trip count being zero in preparePlanForEpiVectorLoop.

After a485e0e, we may not set the vector trip count in
preparePlanForEpilogueVectorLoop if it is zero. We should not choose a
VF * UF that makes the main vector loop dead (i.e. vector trip count is
zero), but there are some cases where this can happen currently.

In those cases, set EPI.VectorTripCount to zero.
This commit is contained in:
Florian Hahn 2025-08-20 11:54:21 +01:00
parent 0989ff5de8
commit dc23869f98
No known key found for this signature in database
GPG Key ID: C8B0D7090F9127E6
2 changed files with 101 additions and 0 deletions

View File

@ -9796,6 +9796,19 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
"Must only have a single non-zero incoming value");
EPI.VectorTripCount = Inc;
}
// If we didn't find a non-zero vector trip count, all incoming values
// must be zero, which also means the vector trip count is zero. Pick the
// first zero as vector trip count.
// TODO: We should not choose VF * UF so the main vector loop is known to
// be dead.
if (!EPI.VectorTripCount) {
assert(
EPResumeVal->getNumIncomingValues() > 0 &&
all_of(EPResumeVal->incoming_values(),
[](Value *Inc) { return match(Inc, m_SpecificInt(0)); }) &&
"all incoming values must be 0");
EPI.VectorTripCount = EPResumeVal->getOperand(0);
}
VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
assert(all_of(IV->users(),
[](const VPUser *U) {

View File

@ -333,3 +333,91 @@ for.body:
exit:
ret void
}
; TODO: Choose smaller VF * UF for main loop, so we do not create a dead vector loop.
define void @small_trip_count_loop(ptr %arg, ptr %arg2) {
; CHECK-LABEL: @small_trip_count_loop(
; CHECK-NEXT: iter.check:
; CHECK-NEXT: [[ARG3:%.*]] = ptrtoint ptr [[ARG:%.*]] to i64
; CHECK-NEXT: [[ARG21:%.*]] = ptrtoint ptr [[ARG2:%.*]] to i64
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[ARG21]], [[ARG3]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 16
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 32
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 48
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[ARG]], align 1
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i8> [[WIDE_LOAD]], splat (i8 10)
; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[WIDE_LOAD4]], splat (i8 10)
; CHECK-NEXT: [[TMP6:%.*]] = add <16 x i8> [[WIDE_LOAD5]], splat (i8 10)
; CHECK-NEXT: [[TMP7:%.*]] = add <16 x i8> [[WIDE_LOAD6]], splat (i8 10)
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 16
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 32
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 48
; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[ARG2]], align 1
; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP8]], align 1
; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP9]], align 1
; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP10]], align 1
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1
; CHECK-NEXT: [[TMP12:%.*]] = add <16 x i8> [[WIDE_LOAD7]], splat (i8 10)
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 [[INDEX]]
; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP13]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 [[IV]]
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP_A]], align 1
; CHECK-NEXT: [[SELECT:%.*]] = add i8 [[LOAD]], 10
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 [[IV]]
; CHECK-NEXT: store i8 [[SELECT]], ptr [[GEP_B]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 20
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr inbounds i8, ptr %arg, i32 %iv
%load = load i8, ptr %gep.A, align 1
%select = add i8 %load, 10
%gep.B = getelementptr inbounds i8, ptr %arg2, i32 %iv
store i8 %select, ptr %gep.B, align 1
%iv.next = add i32 %iv, 1
%ec = icmp eq i32 %iv, 20
br i1 %ec, label %exit, label %loop
exit:
ret void
}