[LV]: Skip Epilogue scalable VF greater than RemainingIterations. (#156724)

Consider skipping epilogue scalable VF when they are greater than
RemainingIterations same as fixed VF.
And skip scalable RemainingIterations from that comparison because
SCEV ATM can't evaluate non-canonical vscale-based expressions.
This commit is contained in:
Hassnaa Hamdi 2025-11-19 05:11:17 +00:00 committed by GitHub
parent c942ebdb66
commit f7f41350b4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 63 additions and 153 deletions

View File

@ -4428,11 +4428,13 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
assert(!isa<SCEVCouldNotCompute>(TC) && "Trip count SCEV must be computable");
const SCEV *KnownMinTC;
bool ScalableTC = match(TC, m_scev_c_Mul(m_SCEV(KnownMinTC), m_SCEVVScale()));
bool ScalableRemIter = false;
// Use versions of TC and VF in which both are either scalable or fixed.
if (ScalableTC == MainLoopVF.isScalable())
if (ScalableTC == MainLoopVF.isScalable()) {
ScalableRemIter = ScalableTC;
RemainingIterations =
SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC));
else if (ScalableTC) {
} else if (ScalableTC) {
const SCEV *EstimatedTC = SE.getMulExpr(
KnownMinTC,
SE.getConstant(TCType, CM.getVScaleForTuning().value_or(1)));
@ -4456,6 +4458,9 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
<< MaxTripCount << "\n");
}
auto SkipVF = [&](const SCEV *VF, const SCEV *RemIter) -> bool {
return SE.isKnownPredicate(CmpInst::ICMP_UGT, VF, RemIter);
};
for (auto &NextVF : ProfitableVFs) {
// Skip candidate VFs without a corresponding VPlan.
if (!hasPlanWithVF(NextVF.Width))
@ -4473,11 +4478,17 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
// If NextVF is greater than the number of remaining iterations, the
// epilogue loop would be dead. Skip such factors.
if (RemainingIterations && !NextVF.Width.isScalable()) {
if (SE.isKnownPredicate(
CmpInst::ICMP_UGT,
SE.getConstant(TCType, NextVF.Width.getFixedValue()),
RemainingIterations))
// TODO: We should also consider comparing against a scalable
// RemainingIterations when SCEV be able to evaluate non-canonical
// vscale-based expressions.
if (!ScalableRemIter) {
// Handle the case where NextVF and RemainingIterations are in different
// numerical spaces.
ElementCount EC = NextVF.Width;
if (NextVF.Width.isScalable())
EC = ElementCount::getFixed(
estimateElementCount(NextVF.Width, CM.getVScaleForTuning()));
if (SkipVF(SE.getElementCount(TCType, EC), RemainingIterations))
continue;
}

View File

@ -9,13 +9,8 @@ target triple = "aarch64-linux-gnu"
define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i32 %x) #0 {
; CHECK-LABEL: define i64 @vector_loop_with_remaining_iterations(
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ITER_CHECK:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 17, [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
@ -39,58 +34,12 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP11]])
; CHECK-NEXT: br label %[[VEC_EPILOG_ITER_CHECK:.*]]
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ule i64 1, [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; CHECK: [[VEC_EPILOG_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP13]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 17, [[TMP17]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 [[TMP17]], i64 [[N_MOD_VF]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 17, [[TMP19]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <vscale x 2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
; CHECK-NEXT: [[TMP24:%.*]] = call <vscale x 2 x i32> @llvm.abs.nxv2i32(<vscale x 2 x i32> [[BROADCAST_SPLAT2]], i1 false)
; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP26:%.*]] = mul <vscale x 2 x i64> [[TMP25]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> [[BROADCAST_SPLAT4]], [[TMP26]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP17]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <vscale x 2 x i64> [ [[TMP22]], %[[VEC_EPILOG_PH]] ], [ [[TMP35:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], <vscale x 2 x i64> [[VEC_IND]], i32 0, i64 3
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 [[TMP28]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i8> poison)
; CHECK-NEXT: [[TMP29:%.*]] = zext <vscale x 2 x i8> [[WIDE_MASKED_GATHER]] to <vscale x 2 x i32>
; CHECK-NEXT: [[TMP30:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP29]])
; CHECK-NEXT: [[TMP31:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP30]])
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX7]]
; CHECK-NEXT: store <vscale x 2 x i8> zeroinitializer, ptr [[TMP32]], align 1
; CHECK-NEXT: [[TMP34:%.*]] = zext <vscale x 2 x i32> [[TMP31]] to <vscale x 2 x i64>
; CHECK-NEXT: [[TMP35]] = or <vscale x 2 x i64> [[VEC_PHI8]], [[TMP34]]
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX7]], [[TMP17]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT6]]
; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP36]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> [[TMP35]])
; CHECK-NEXT: br label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ [[TMP37]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP13]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL10]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX11]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[TMP13]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_SRC_I_I:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[IV]], i32 0, i64 3
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I_I]], align 1
; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32
@ -104,7 +53,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3
; CHECK-NEXT: [[RED_NEXT]] = or i64 [[RED]], [[MIN_EXT]]
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[IV_NEXT]], 17
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ]
; CHECK-NEXT: ret i64 [[RED_NEXT_LCSSA]]
@ -140,13 +89,8 @@ exit:
define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr noalias %dst, i32 %x) #0 {
; CHECK-LABEL: define i64 @main_vector_loop_fixed_with_no_remaining_iterations(
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ITER_CHECK:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 17, [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
@ -167,61 +111,15 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no
; CHECK-NEXT: [[TMP11]] = or <16 x i64> [[VEC_PHI]], [[TMP10]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP11]])
; CHECK-NEXT: br label %[[VEC_EPILOG_ITER_CHECK:.*]]
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ule i64 1, [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
; CHECK: [[VEC_EPILOG_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP13]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 17, [[TMP17]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 [[TMP17]], i64 [[N_MOD_VF]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 17, [[TMP19]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <vscale x 2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
; CHECK-NEXT: [[TMP24:%.*]] = call <vscale x 2 x i32> @llvm.abs.nxv2i32(<vscale x 2 x i32> [[BROADCAST_SPLAT2]], i1 false)
; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP38:%.*]] = mul <vscale x 2 x i64> [[TMP25]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> [[BROADCAST_SPLAT4]], [[TMP38]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP17]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <vscale x 2 x i64> [ [[TMP22]], %[[VEC_EPILOG_PH]] ], [ [[TMP35:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], <vscale x 2 x i64> [[VEC_IND]], i32 0, i64 3
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 [[TMP28]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i8> poison)
; CHECK-NEXT: [[TMP29:%.*]] = zext <vscale x 2 x i8> [[WIDE_MASKED_GATHER]] to <vscale x 2 x i32>
; CHECK-NEXT: [[TMP30:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP29]])
; CHECK-NEXT: [[TMP31:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP30]])
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX7]]
; CHECK-NEXT: store <vscale x 2 x i8> zeroinitializer, ptr [[TMP32]], align 1
; CHECK-NEXT: [[TMP34:%.*]] = zext <vscale x 2 x i32> [[TMP31]] to <vscale x 2 x i64>
; CHECK-NEXT: [[TMP35]] = or <vscale x 2 x i64> [[VEC_PHI8]], [[TMP34]]
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX7]], [[TMP17]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT6]]
; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP36]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> [[TMP35]])
; CHECK-NEXT: br label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ [[TMP37]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP13]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL10]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX11]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[TMP13]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_SRC_I_I:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[IV]], i32 0, i64 3
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I_I]], align 1
; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32
@ -235,7 +133,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no
; CHECK-NEXT: [[RED_NEXT]] = or i64 [[RED]], [[MIN_EXT]]
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[IV_NEXT]], 17
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ]
; CHECK-NEXT: ret i64 [[RED_NEXT_LCSSA]]
@ -308,7 +206,7 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
; CHECK-NEXT: store i64 0, ptr [[L]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
@ -332,7 +230,7 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
; CHECK-NEXT: store i64 0, ptr [[L]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV1]], 2
; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV1]], 14
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;

View File

@ -9,15 +9,15 @@ define void @cost_store_i8(ptr %dst) #0 {
; DEFAULT-LABEL: define void @cost_store_i8(
; DEFAULT-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
; DEFAULT-NEXT: iter.check:
; DEFAULT-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP10]], 3
; DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 101, [[TMP13]]
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; DEFAULT: vector.main.loop.iter.check:
; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
; DEFAULT-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 101, [[TMP1]]
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; DEFAULT: vector.main.loop.iter.check:
; DEFAULT-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 5
; DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 101, [[TMP10]]
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; DEFAULT: vector.ph:
; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
@ -39,22 +39,22 @@ define void @cost_store_i8(ptr %dst) #0 {
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 101, [[N_VEC]]
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; DEFAULT: vec.epilog.iter.check:
; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP13]]
; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP1]]
; DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; DEFAULT: vec.epilog.ph:
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; DEFAULT-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8
; DEFAULT-NEXT: [[N_MOD_VF2:%.*]] = urem i64 101, [[TMP12]]
; DEFAULT-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 4
; DEFAULT-NEXT: [[N_MOD_VF2:%.*]] = urem i64 101, [[TMP15]]
; DEFAULT-NEXT: [[N_VEC3:%.*]] = sub i64 101, [[N_MOD_VF2]]
; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; DEFAULT: vec.epilog.vector.body:
; DEFAULT-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX1]]
; DEFAULT-NEXT: store <vscale x 8 x i8> zeroinitializer, ptr [[TMP9]], align 1
; DEFAULT-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP12]]
; DEFAULT-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC3]]
; DEFAULT-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; DEFAULT-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX5]]
; DEFAULT-NEXT: store <vscale x 4 x i8> zeroinitializer, ptr [[TMP19]], align 1
; DEFAULT-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP15]]
; DEFAULT-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]]
; DEFAULT-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; DEFAULT: vec.epilog.middle.block:
; DEFAULT-NEXT: [[CMP_N6:%.*]] = icmp eq i64 101, [[N_VEC3]]
; DEFAULT-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
@ -146,12 +146,12 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP5]], align 1, !alias.scope [[META9]], !noalias [[META6]]
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; DEFAULT-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
; DEFAULT-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; DEFAULT: middle.block:
; DEFAULT-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; DEFAULT: vec.epilog.iter.check:
; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF12:![0-9]+]]
; DEFAULT: vec.epilog.ph:
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i16> poison, i16 [[X]], i64 0
@ -165,11 +165,11 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; DEFAULT: vec.epilog.vector.body:
; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX6]]
; DEFAULT-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP11]], align 1, !alias.scope [[META9]], !noalias [[META6]]
; DEFAULT-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX6]]
; DEFAULT-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP13]], align 1, !alias.scope [[META9]], !noalias [[META6]]
; DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 8
; DEFAULT-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT9]], 1000
; DEFAULT-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; DEFAULT-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; DEFAULT: vec.epilog.middle.block:
; DEFAULT-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; DEFAULT: vec.epilog.scalar.ph:
@ -185,7 +185,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; DEFAULT-NEXT: store i8 [[TRUNC]], ptr [[GEP]], align 1
; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
; DEFAULT: exit:
; DEFAULT-NEXT: ret void
;
@ -268,7 +268,7 @@ attributes #1 = { vscale_range(1,16) "target-features"="+sve" }
; DEFAULT: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; DEFAULT: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; DEFAULT: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; DEFAULT: [[PROF3]] = !{!"branch_weights", i32 8, i32 24}
; DEFAULT: [[PROF3]] = !{!"branch_weights", i32 4, i32 28}
; DEFAULT: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; DEFAULT: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; DEFAULT: [[META6]] = !{[[META7:![0-9]+]]}
@ -277,8 +277,9 @@ attributes #1 = { vscale_range(1,16) "target-features"="+sve" }
; DEFAULT: [[META9]] = !{[[META10:![0-9]+]]}
; DEFAULT: [[META10]] = distinct !{[[META10]], [[META8]]}
; DEFAULT: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]}
; DEFAULT: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
; DEFAULT: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
; DEFAULT: [[PROF12]] = !{!"branch_weights", i32 8, i32 24}
; DEFAULT: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]}
; DEFAULT: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]]}
;.
; PRED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; PRED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}