Bound ISL operations during pre-vectorization (#165204)
Bound ISL operations during pre-vectorization to prevent indefinite compilation. The MaxOpGuard previously used for schedule computation is now extended to also guard pre-vectorization optimizations. This patch includes a reduced test case derived from the original bug report. --------- Co-authored-by: Michael Kruse <llvm-project@meinersbur.de>
This commit is contained in:
parent
8bd7fc7211
commit
795fa9ea00
@ -237,6 +237,7 @@ struct OptimizerAdditionalInfoTy {
|
||||
bool Postopts;
|
||||
bool Prevect;
|
||||
bool &DepsChanged;
|
||||
IslMaxOperationsGuard &MaxOpGuard;
|
||||
};
|
||||
|
||||
class ScheduleTreeOptimizer final {
|
||||
@ -381,6 +382,8 @@ private:
|
||||
isl::schedule_node
|
||||
ScheduleTreeOptimizer::isolateFullPartialTiles(isl::schedule_node Node,
|
||||
int VectorWidth) {
|
||||
if (Node.is_null())
|
||||
return {};
|
||||
assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band);
|
||||
Node = Node.child(0).child(0);
|
||||
isl::union_map SchedRelUMap = Node.get_prefix_schedule_relation();
|
||||
@ -391,6 +394,8 @@ ScheduleTreeOptimizer::isolateFullPartialTiles(isl::schedule_node Node,
|
||||
isl::union_set IsolateOption = getIsolateOptions(IsolateDomain, 1);
|
||||
Node = Node.parent().parent();
|
||||
isl::union_set Options = IsolateOption.unite(AtomicOption);
|
||||
if (Node.is_null())
|
||||
return {};
|
||||
isl::schedule_node_band Result =
|
||||
Node.as<isl::schedule_node_band>().set_ast_build_options(Options);
|
||||
return Result;
|
||||
@ -411,9 +416,13 @@ struct InsertSimdMarkers final : ScheduleNodeRewriter<InsertSimdMarkers> {
|
||||
|
||||
isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand(
|
||||
isl::schedule_node Node, unsigned DimToVectorize, int VectorWidth) {
|
||||
if (Node.is_null())
|
||||
return {};
|
||||
assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band);
|
||||
|
||||
auto Space = isl::manage(isl_schedule_node_band_get_space(Node.get()));
|
||||
if (Space.is_null())
|
||||
return {};
|
||||
unsigned ScheduleDimensions = unsignedFromIslSize(Space.dim(isl::dim::set));
|
||||
assert(DimToVectorize < ScheduleDimensions);
|
||||
|
||||
@ -439,12 +448,15 @@ isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand(
|
||||
// Sink the inner loop into the smallest possible statements to make them
|
||||
// represent a single vector instruction if possible.
|
||||
Node = isl::manage(isl_schedule_node_band_sink(Node.release()));
|
||||
if (Node.is_null())
|
||||
return {};
|
||||
|
||||
// Add SIMD markers to those vector statements.
|
||||
InsertSimdMarkers SimdMarkerInserter;
|
||||
Node = SimdMarkerInserter.visit(Node);
|
||||
|
||||
PrevectOpts++;
|
||||
if (!Node.is_null())
|
||||
PrevectOpts++;
|
||||
return Node.parent();
|
||||
}
|
||||
|
||||
@ -535,6 +547,8 @@ ScheduleTreeOptimizer::applyTileBandOpt(isl::schedule_node Node) {
|
||||
isl::schedule_node
|
||||
ScheduleTreeOptimizer::applyPrevectBandOpt(isl::schedule_node Node) {
|
||||
auto Space = isl::manage(isl_schedule_node_band_get_space(Node.get()));
|
||||
if (Space.is_null())
|
||||
return {};
|
||||
int Dims = unsignedFromIslSize(Space.dim(isl::dim::set));
|
||||
|
||||
for (int i = Dims - 1; i >= 0; i--)
|
||||
@ -572,9 +586,14 @@ ScheduleTreeOptimizer::optimizeBand(__isl_take isl_schedule_node *NodeArg,
|
||||
Node = applyTileBandOpt(Node);
|
||||
|
||||
if (OAI->Prevect) {
|
||||
IslQuotaScope MaxScope = OAI->MaxOpGuard.enter();
|
||||
|
||||
// FIXME: Prevectorization requirements are different from those checked by
|
||||
// isTileableBandNode.
|
||||
Node = applyPrevectBandOpt(Node);
|
||||
|
||||
if (OAI->MaxOpGuard.hasQuotaExceeded() || Node.is_null())
|
||||
return (isl::schedule_node()).release();
|
||||
}
|
||||
|
||||
return Node.release();
|
||||
@ -771,6 +790,10 @@ static void runIslScheduleOptimizer(
|
||||
return;
|
||||
}
|
||||
|
||||
isl_ctx *Ctx = S.getIslCtx().get();
|
||||
IslMaxOperationsGuard MaxOpGuard(Ctx, ScheduleComputeOut,
|
||||
/*AutoEnter=*/false);
|
||||
|
||||
// Apply ISL's algorithm only if not overridden by the user. Note that
|
||||
// post-rescheduling optimizations (tiling, pattern-based, prevectorization)
|
||||
// rely on the coincidence/permutable annotations on schedule tree bands that
|
||||
@ -853,8 +876,6 @@ static void runIslScheduleOptimizer(
|
||||
IslOuterCoincidence = 0;
|
||||
}
|
||||
|
||||
isl_ctx *Ctx = S.getIslCtx().get();
|
||||
|
||||
isl_options_set_schedule_outer_coincidence(Ctx, IslOuterCoincidence);
|
||||
isl_options_set_schedule_maximize_band_depth(Ctx, IslMaximizeBands);
|
||||
isl_options_set_schedule_max_constant_term(Ctx, MaxConstantTerm);
|
||||
@ -870,28 +891,20 @@ static void runIslScheduleOptimizer(
|
||||
SC = SC.set_coincidence(Validity);
|
||||
|
||||
{
|
||||
IslMaxOperationsGuard MaxOpGuard(Ctx, ScheduleComputeOut);
|
||||
IslQuotaScope MaxOpScope = MaxOpGuard.enter();
|
||||
Schedule = SC.compute_schedule();
|
||||
|
||||
if (MaxOpGuard.hasQuotaExceeded())
|
||||
POLLY_DEBUG(
|
||||
dbgs() << "Schedule optimizer calculation exceeds ISL quota\n");
|
||||
}
|
||||
|
||||
isl_options_set_on_error(Ctx, OnErrorStatus);
|
||||
|
||||
ScopsRescheduled++;
|
||||
if (!Schedule.is_null())
|
||||
ScopsRescheduled++;
|
||||
POLLY_DEBUG(printSchedule(dbgs(), Schedule, "After rescheduling"));
|
||||
}
|
||||
|
||||
walkScheduleTreeForStatistics(Schedule, 1);
|
||||
|
||||
// In cases the scheduler is not able to optimize the code, we just do not
|
||||
// touch the schedule.
|
||||
if (Schedule.is_null())
|
||||
return;
|
||||
|
||||
if (GreedyFusion) {
|
||||
if (GreedyFusion && !Schedule.is_null()) {
|
||||
isl::union_map Validity = D.getDependences(
|
||||
Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW);
|
||||
Schedule = applyGreedyFusion(Schedule, Validity);
|
||||
@ -905,14 +918,20 @@ static void runIslScheduleOptimizer(
|
||||
/*PatternOpts=*/!HasUserTransformation && PMBasedOpts,
|
||||
/*Postopts=*/!HasUserTransformation && EnablePostopts,
|
||||
/*Prevect=*/PollyVectorizerChoice != VECTORIZER_NONE,
|
||||
DepsChanged};
|
||||
if (OAI.PatternOpts || OAI.Postopts || OAI.Prevect) {
|
||||
DepsChanged,
|
||||
MaxOpGuard};
|
||||
if (!Schedule.is_null() && (OAI.PatternOpts || OAI.Postopts || OAI.Prevect)) {
|
||||
Schedule = ScheduleTreeOptimizer::optimizeSchedule(Schedule, &OAI);
|
||||
Schedule = hoistExtensionNodes(Schedule);
|
||||
POLLY_DEBUG(printSchedule(dbgs(), Schedule, "After post-optimizations"));
|
||||
walkScheduleTreeForStatistics(Schedule, 2);
|
||||
}
|
||||
|
||||
if (MaxOpGuard.hasQuotaExceeded()) {
|
||||
POLLY_DEBUG(dbgs() << "Schedule optimizer calculation exceeds ISL quota\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip profitability check if user transformation(s) have been applied.
|
||||
if (!HasUserTransformation &&
|
||||
!ScheduleTreeOptimizer::isProfitableSchedule(S, Schedule))
|
||||
|
||||
@ -972,6 +972,9 @@ BandAttr *polly::getBandAttr(isl::schedule_node MarkOrBand) {
|
||||
}
|
||||
|
||||
isl::schedule polly::hoistExtensionNodes(isl::schedule Sched) {
|
||||
if (Sched.is_null())
|
||||
return {};
|
||||
|
||||
// If there is no extension node in the first place, return the original
|
||||
// schedule tree.
|
||||
if (!containsExtensionNode(Sched))
|
||||
@ -1126,6 +1129,8 @@ isl::set polly::getPartialTilePrefixes(isl::set ScheduleRange,
|
||||
|
||||
isl::union_set polly::getIsolateOptions(isl::set IsolateDomain,
|
||||
unsigned OutDimsNum) {
|
||||
if (IsolateDomain.is_null())
|
||||
return {};
|
||||
unsigned Dims = unsignedFromIslSize(IsolateDomain.tuple_dim());
|
||||
assert(OutDimsNum <= Dims &&
|
||||
"The isl::set IsolateDomain is used to describe the range of schedule "
|
||||
|
||||
37
polly/test/ScheduleOptimizer/prevectorization_islbound.ll
Normal file
37
polly/test/ScheduleOptimizer/prevectorization_islbound.ll
Normal file
@ -0,0 +1,37 @@
|
||||
; RUN: opt %loadNPMPolly -S -polly-vectorizer=stripmine -passes=polly-opt-isl -polly-debug -disable-output < %s 2>&1 | FileCheck %s
|
||||
; REQUIRES: asserts
|
||||
|
||||
define void @ham(ptr %arg, ptr %arg1, i32 %arg2, i32 %arg3, ptr %arg4, i32 %arg5, i32 %arg6) {
|
||||
bb:
|
||||
%getelementptr = getelementptr [7 x float], ptr null, i32 0, i32 %arg3
|
||||
br label %bb7
|
||||
|
||||
bb7: ; preds = %bb11, %bb
|
||||
%phi = phi i32 [ 0, %bb ], [ %add16, %bb11 ]
|
||||
br label %bb8
|
||||
|
||||
bb8: ; preds = %bb8, %bb7
|
||||
%phi9 = phi i32 [ 0, %bb7 ], [ %add, %bb8 ]
|
||||
%getelementptr10 = getelementptr [7 x float], ptr null, i32 0, i32 %phi9
|
||||
store float 0.000000e+00, ptr %getelementptr10, align 4
|
||||
%add = add i32 %phi9, 1
|
||||
%icmp = icmp eq i32 %phi9, 0
|
||||
br i1 %icmp, label %bb8, label %bb11
|
||||
|
||||
bb11: ; preds = %bb8
|
||||
%load = load float, ptr %getelementptr, align 4
|
||||
store float %load, ptr %arg4, align 4
|
||||
%getelementptr12 = getelementptr [7 x float], ptr null, i32 0, i32 %arg5
|
||||
%load13 = load float, ptr %getelementptr12, align 4
|
||||
store float %load13, ptr %arg, align 4
|
||||
%getelementptr14 = getelementptr [7 x float], ptr null, i32 0, i32 %arg6
|
||||
%load15 = load float, ptr %getelementptr14, align 4
|
||||
store float %load15, ptr %arg1, align 4
|
||||
%add16 = add i32 %phi, 1
|
||||
%icmp17 = icmp ne i32 %phi, %arg2
|
||||
br i1 %icmp17, label %bb7, label %bb18
|
||||
|
||||
bb18: ; preds = %bb11
|
||||
ret void
|
||||
}
|
||||
; CHECK:Schedule optimizer calculation exceeds ISL quota
|
||||
Loading…
x
Reference in New Issue
Block a user