[MLIR][OpenMP] Add omp.fuse operation (#168898)
This patch is a follow-up from #161213 and adds the omp.fuse loop transformation for the OpenMP dialect. Used for lowering a `!$omp fuse` in Flang. Added Lowering and end2end tests.
This commit is contained in:
parent
7c1d517ebe
commit
f560e4cfb1
@ -259,6 +259,7 @@ bool ClauseProcessor::processCollapse(
|
||||
llvm::SmallVectorImpl<const semantics::Symbol *> &iv) const {
|
||||
|
||||
int64_t numCollapse = collectLoopRelatedInfo(converter, currentLocation, eval,
|
||||
getNestedDoConstruct(eval),
|
||||
clauses, loopResult, iv);
|
||||
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
||||
collapseResult.collapseNumLoops = firOpBuilder.getI64IntegerAttr(numCollapse);
|
||||
@ -518,6 +519,21 @@ bool ClauseProcessor::processSizes(StatementContext &stmtCtx,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ClauseProcessor::processLooprange(StatementContext &stmtCtx,
|
||||
mlir::omp::LooprangeClauseOps &result,
|
||||
int64_t &count) const {
|
||||
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
||||
if (auto *clause = findUniqueClause<omp::clause::Looprange>()) {
|
||||
int64_t first = evaluate::ToInt64(std::get<0>(clause->t)).value();
|
||||
count = evaluate::ToInt64(std::get<1>(clause->t)).value();
|
||||
result.first = firOpBuilder.getI64IntegerAttr(first);
|
||||
result.count = firOpBuilder.getI64IntegerAttr(count);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ClauseProcessor::processNumTeams(
|
||||
lower::StatementContext &stmtCtx,
|
||||
mlir::omp::NumTeamsClauseOps &result) const {
|
||||
|
||||
@ -68,6 +68,9 @@ public:
|
||||
llvm::SmallVectorImpl<const semantics::Symbol *> &iv) const;
|
||||
bool processSizes(StatementContext &stmtCtx,
|
||||
mlir::omp::SizesClauseOps &result) const;
|
||||
bool processLooprange(StatementContext &stmtCtx,
|
||||
mlir::omp::LooprangeClauseOps &result,
|
||||
int64_t &count) const;
|
||||
bool processDevice(lower::StatementContext &stmtCtx,
|
||||
mlir::omp::DeviceClauseOps &result) const;
|
||||
bool processDeviceType(mlir::omp::DeviceTypeClauseOps &result) const;
|
||||
|
||||
@ -347,7 +347,7 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
|
||||
mlir::omp::LoopRelatedClauseOps result;
|
||||
llvm::SmallVector<const semantics::Symbol *> iv;
|
||||
collectLoopRelatedInfo(converter, converter.getCurrentLocation(), eval,
|
||||
clauses, result, iv);
|
||||
getNestedDoConstruct(eval), clauses, result, iv);
|
||||
|
||||
// Update the original variable just before exiting the worksharing
|
||||
// loop. Conversion as follows:
|
||||
|
||||
@ -2039,12 +2039,27 @@ genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
||||
return loopOp;
|
||||
}
|
||||
|
||||
// ´nestedEval´ is the Evaluation of a children loop of ´eval´.
|
||||
// In a regular OpenMP Construct Evaluation ´nestedEval´ is the only children.
|
||||
// Can be retrieved with getNestedDoConstruct(Evaluation).
|
||||
// <<OpenMPConstruct>>
|
||||
// Loop
|
||||
// <<End OpenMPConstruct>>
|
||||
//
|
||||
// ´nestedEval´ is most useful in the case that ´eval´ contains a sequence
|
||||
// of loops. Then this function generates Canonical loop nests for individual
|
||||
// loops.
|
||||
// <<OpenMPConstruct>>
|
||||
// Loop 1
|
||||
// Loop 2
|
||||
// <<End OpenMPConstruct>>
|
||||
//
|
||||
static void genCanonicalLoopNest(
|
||||
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
||||
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
||||
mlir::Location loc, const ConstructQueue &queue,
|
||||
ConstructQueue::const_iterator item, size_t numLoops,
|
||||
llvm::SmallVectorImpl<mlir::omp::CanonicalLoopOp> &loops) {
|
||||
lower::pft::Evaluation *nestedEval, mlir::Location loc,
|
||||
const ConstructQueue &queue, ConstructQueue::const_iterator item,
|
||||
size_t numLoops, llvm::SmallVectorImpl<mlir::omp::CanonicalLoopOp> &loops) {
|
||||
assert(loops.empty() && "Expecting empty list to fill");
|
||||
assert(numLoops >= 1 && "Expecting at least one loop");
|
||||
|
||||
@ -2052,7 +2067,8 @@ static void genCanonicalLoopNest(
|
||||
|
||||
mlir::omp::LoopRelatedClauseOps loopInfo;
|
||||
llvm::SmallVector<const semantics::Symbol *, 3> ivs;
|
||||
collectLoopRelatedInfo(converter, loc, eval, numLoops, loopInfo, ivs);
|
||||
collectLoopRelatedInfo(converter, loc, eval, nestedEval, numLoops, loopInfo,
|
||||
ivs);
|
||||
assert(ivs.size() == numLoops &&
|
||||
"Expected to parse as many loop variables as there are loops");
|
||||
|
||||
@ -2074,7 +2090,7 @@ static void genCanonicalLoopNest(
|
||||
|
||||
// Step 1: Loop prologues
|
||||
// Computing the trip count must happen before entering the outermost loop
|
||||
lower::pft::Evaluation *innermostEval = &eval.getFirstNestedEvaluation();
|
||||
lower::pft::Evaluation *innermostEval = nestedEval;
|
||||
for ([[maybe_unused]] auto iv : ivs) {
|
||||
if (innermostEval->getIf<parser::DoConstruct>()->IsDoConcurrent()) {
|
||||
// OpenMP specifies DO CONCURRENT only with the `!omp loop` construct.
|
||||
@ -2246,8 +2262,9 @@ static void genTileOp(Fortran::lower::AbstractConverter &converter,
|
||||
llvm::SmallVector<mlir::omp::CanonicalLoopOp, 3> canonLoops;
|
||||
canonLoops.reserve(numLoops);
|
||||
|
||||
genCanonicalLoopNest(converter, symTable, semaCtx, eval, loc, queue, item,
|
||||
numLoops, canonLoops);
|
||||
genCanonicalLoopNest(converter, symTable, semaCtx, eval,
|
||||
getNestedDoConstruct(eval), loc, queue, item, numLoops,
|
||||
canonLoops);
|
||||
assert((canonLoops.size() == numLoops) &&
|
||||
"Expecting the predetermined number of loops");
|
||||
|
||||
@ -2277,6 +2294,50 @@ static void genTileOp(Fortran::lower::AbstractConverter &converter,
|
||||
sizesClause.sizes);
|
||||
}
|
||||
|
||||
static void genFuseOp(Fortran::lower::AbstractConverter &converter,
|
||||
Fortran::lower::SymMap &symTable,
|
||||
lower::StatementContext &stmtCtx,
|
||||
Fortran::semantics::SemanticsContext &semaCtx,
|
||||
Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
|
||||
const ConstructQueue &queue,
|
||||
ConstructQueue::const_iterator item) {
|
||||
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
||||
|
||||
int64_t count = 0;
|
||||
mlir::omp::LooprangeClauseOps looprangeClause;
|
||||
ClauseProcessor cp(converter, semaCtx, item->clauses);
|
||||
bool looprange = cp.processLooprange(stmtCtx, looprangeClause, count);
|
||||
|
||||
llvm::SmallVector<mlir::Value> applyees;
|
||||
for (auto &child : eval.getNestedEvaluations()) {
|
||||
// Stop at OmpEndLoopDirective
|
||||
if (&child == &eval.getLastNestedEvaluation())
|
||||
break;
|
||||
// Skip any Compiler Directive
|
||||
if (child.getIf<parser::CompilerDirective>())
|
||||
continue;
|
||||
|
||||
// Emit the associated loop
|
||||
llvm::SmallVector<mlir::omp::CanonicalLoopOp> canonLoops;
|
||||
genCanonicalLoopNest(converter, symTable, semaCtx, eval, &child, loc, queue,
|
||||
item, 1, canonLoops);
|
||||
|
||||
auto cli = llvm::getSingleElement(canonLoops).getCli();
|
||||
applyees.push_back(cli);
|
||||
}
|
||||
// One generated loop + one for each loop not inside the specified looprange
|
||||
// if present
|
||||
llvm::SmallVector<mlir::Value> generatees;
|
||||
int64_t numGeneratees = !looprange ? 1 : applyees.size() - count + 1;
|
||||
for (int i = 0; i < numGeneratees; i++) {
|
||||
auto fusedCLI = mlir::omp::NewCliOp::create(firOpBuilder, loc);
|
||||
generatees.push_back(fusedCLI);
|
||||
}
|
||||
|
||||
mlir::omp::FuseOp::create(firOpBuilder, loc, generatees, applyees,
|
||||
looprangeClause.first, looprangeClause.count);
|
||||
}
|
||||
|
||||
static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
|
||||
Fortran::lower::SymMap &symTable,
|
||||
lower::StatementContext &stmtCtx,
|
||||
@ -2293,7 +2354,8 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
|
||||
|
||||
// Emit the associated loop
|
||||
llvm::SmallVector<mlir::omp::CanonicalLoopOp, 1> canonLoops;
|
||||
genCanonicalLoopNest(converter, symTable, semaCtx, eval, loc, queue, item, 1,
|
||||
genCanonicalLoopNest(converter, symTable, semaCtx, eval,
|
||||
getNestedDoConstruct(eval), loc, queue, item, 1,
|
||||
canonLoops);
|
||||
|
||||
llvm::SmallVector<mlir::Value, 1> applyees;
|
||||
@ -3672,13 +3734,9 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
|
||||
case llvm::omp::Directive::OMPD_tile:
|
||||
genTileOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
|
||||
break;
|
||||
case llvm::omp::Directive::OMPD_fuse: {
|
||||
unsigned version = semaCtx.langOptions().OpenMPVersion;
|
||||
if (!semaCtx.langOptions().OpenMPSimd)
|
||||
TODO(loc, "Unhandled loop directive (" +
|
||||
llvm::omp::getOpenMPDirectiveName(dir, version) + ")");
|
||||
case llvm::omp::Directive::OMPD_fuse:
|
||||
genFuseOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
|
||||
break;
|
||||
}
|
||||
case llvm::omp::Directive::OMPD_unroll:
|
||||
genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
|
||||
break;
|
||||
|
||||
@ -836,13 +836,14 @@ void collectTileSizesFromOpenMPConstruct(
|
||||
|
||||
int64_t collectLoopRelatedInfo(
|
||||
lower::AbstractConverter &converter, mlir::Location currentLocation,
|
||||
lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses,
|
||||
lower::pft::Evaluation &eval, lower::pft::Evaluation *nestedEval,
|
||||
const omp::List<omp::Clause> &clauses,
|
||||
mlir::omp::LoopRelatedClauseOps &result,
|
||||
llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
|
||||
int64_t numCollapse = 1;
|
||||
|
||||
// Collect the loops to collapse.
|
||||
lower::pft::Evaluation *doConstructEval = getNestedDoConstruct(eval);
|
||||
lower::pft::Evaluation *doConstructEval = nestedEval;
|
||||
if (doConstructEval->getIf<parser::DoConstruct>()->IsDoConcurrent()) {
|
||||
TODO(currentLocation, "Do Concurrent in Worksharing loop construct");
|
||||
}
|
||||
@ -854,21 +855,21 @@ int64_t collectLoopRelatedInfo(
|
||||
numCollapse = collapseValue;
|
||||
}
|
||||
|
||||
collectLoopRelatedInfo(converter, currentLocation, eval, numCollapse, result,
|
||||
iv);
|
||||
collectLoopRelatedInfo(converter, currentLocation, eval, nestedEval,
|
||||
numCollapse, result, iv);
|
||||
return numCollapse;
|
||||
}
|
||||
|
||||
void collectLoopRelatedInfo(
|
||||
lower::AbstractConverter &converter, mlir::Location currentLocation,
|
||||
lower::pft::Evaluation &eval, int64_t numCollapse,
|
||||
mlir::omp::LoopRelatedClauseOps &result,
|
||||
lower::pft::Evaluation &eval, lower::pft::Evaluation *nestedEval,
|
||||
int64_t numCollapse, mlir::omp::LoopRelatedClauseOps &result,
|
||||
llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
|
||||
|
||||
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
||||
|
||||
// Collect the loops to collapse.
|
||||
lower::pft::Evaluation *doConstructEval = getNestedDoConstruct(eval);
|
||||
lower::pft::Evaluation *doConstructEval = nestedEval;
|
||||
if (doConstructEval->getIf<parser::DoConstruct>()->IsDoConcurrent()) {
|
||||
TODO(currentLocation, "Do Concurrent in Worksharing loop construct");
|
||||
}
|
||||
|
||||
@ -171,13 +171,15 @@ pft::Evaluation *getNestedDoConstruct(pft::Evaluation &eval);
|
||||
|
||||
int64_t collectLoopRelatedInfo(
|
||||
lower::AbstractConverter &converter, mlir::Location currentLocation,
|
||||
lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses,
|
||||
lower::pft::Evaluation &eval, lower::pft::Evaluation *nestedEval,
|
||||
const omp::List<omp::Clause> &clauses,
|
||||
mlir::omp::LoopRelatedClauseOps &result,
|
||||
llvm::SmallVectorImpl<const semantics::Symbol *> &iv);
|
||||
|
||||
void collectLoopRelatedInfo(
|
||||
lower::AbstractConverter &converter, mlir::Location currentLocation,
|
||||
lower::pft::Evaluation &eval, std::int64_t collapseValue,
|
||||
lower::pft::Evaluation &eval, lower::pft::Evaluation *nestedEval,
|
||||
std::int64_t collapseValue,
|
||||
// const omp::List<omp::Clause> &clauses,
|
||||
mlir::omp::LoopRelatedClauseOps &result,
|
||||
llvm::SmallVectorImpl<const semantics::Symbol *> &iv);
|
||||
|
||||
93
flang/test/Lower/OpenMP/fuse01.f90
Normal file
93
flang/test/Lower/OpenMP/fuse01.f90
Normal file
@ -0,0 +1,93 @@
|
||||
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s | FileCheck %s
|
||||
|
||||
|
||||
subroutine omp_fuse01(lb1, ub1, inc1, lb2, ub2, inc2)
|
||||
integer res, i, j
|
||||
integer lb1, ub1, inc1
|
||||
integer lb2, ub2, inc2
|
||||
|
||||
!$omp fuse
|
||||
do i = lb1, ub1, inc1
|
||||
res = i
|
||||
end do
|
||||
do j = lb2, ub2, inc2
|
||||
res = j
|
||||
end do
|
||||
!$omp end fuse
|
||||
|
||||
end subroutine omp_fuse01
|
||||
|
||||
|
||||
! CHECK-LABEL: func.func @_QPomp_fuse01(
|
||||
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "lb1"},
|
||||
! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "ub1"},
|
||||
! CHECK-SAME: %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "inc1"},
|
||||
! CHECK-SAME: %[[ARG3:.*]]: !fir.ref<i32> {fir.bindc_name = "lb2"},
|
||||
! CHECK-SAME: %[[ARG4:.*]]: !fir.ref<i32> {fir.bindc_name = "ub2"},
|
||||
! CHECK-SAME: %[[ARG5:.*]]: !fir.ref<i32> {fir.bindc_name = "inc2"}) {
|
||||
! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
|
||||
! CHECK: %[[ALLOCA_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_fuse01Ei"}
|
||||
! CHECK: %[[DECLARE_0:.*]]:2 = hlfir.declare %[[ALLOCA_0]] {uniq_name = "_QFomp_fuse01Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[DECLARE_1:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Einc1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[DECLARE_2:.*]]:2 = hlfir.declare %[[ARG5]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Einc2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[ALLOCA_1:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFomp_fuse01Ej"}
|
||||
! CHECK: %[[DECLARE_3:.*]]:2 = hlfir.declare %[[ALLOCA_1]] {uniq_name = "_QFomp_fuse01Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[DECLARE_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Elb1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[DECLARE_5:.*]]:2 = hlfir.declare %[[ARG3]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Elb2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[ALLOCA_2:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_fuse01Eres"}
|
||||
! CHECK: %[[DECLARE_6:.*]]:2 = hlfir.declare %[[ALLOCA_2]] {uniq_name = "_QFomp_fuse01Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[DECLARE_7:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Eub1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[DECLARE_8:.*]]:2 = hlfir.declare %[[ARG4]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Eub2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[LOAD_0:.*]] = fir.load %[[DECLARE_4]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DECLARE_7]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[LOAD_2:.*]] = fir.load %[[DECLARE_1]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[CONSTANT_0:.*]] = arith.constant 0 : i32
|
||||
! CHECK: %[[CONSTANT_1:.*]] = arith.constant 1 : i32
|
||||
! CHECK: %[[CMPI_0:.*]] = arith.cmpi slt, %[[LOAD_2]], %[[CONSTANT_0]] : i32
|
||||
! CHECK: %[[SUBI_0:.*]] = arith.subi %[[CONSTANT_0]], %[[LOAD_2]] : i32
|
||||
! CHECK: %[[SELECT_0:.*]] = arith.select %[[CMPI_0]], %[[SUBI_0]], %[[LOAD_2]] : i32
|
||||
! CHECK: %[[SELECT_1:.*]] = arith.select %[[CMPI_0]], %[[LOAD_1]], %[[LOAD_0]] : i32
|
||||
! CHECK: %[[SELECT_2:.*]] = arith.select %[[CMPI_0]], %[[LOAD_0]], %[[LOAD_1]] : i32
|
||||
! CHECK: %[[SUBI_1:.*]] = arith.subi %[[SELECT_2]], %[[SELECT_1]] overflow<nuw> : i32
|
||||
! CHECK: %[[DIVUI_0:.*]] = arith.divui %[[SUBI_1]], %[[SELECT_0]] : i32
|
||||
! CHECK: %[[ADDI_0:.*]] = arith.addi %[[DIVUI_0]], %[[CONSTANT_1]] overflow<nuw> : i32
|
||||
! CHECK: %[[CMPI_1:.*]] = arith.cmpi slt, %[[SELECT_2]], %[[SELECT_1]] : i32
|
||||
! CHECK: %[[SELECT_3:.*]] = arith.select %[[CMPI_1]], %[[CONSTANT_0]], %[[ADDI_0]] : i32
|
||||
! CHECK: %[[NEW_CLI_0:.*]] = omp.new_cli
|
||||
! CHECK: omp.canonical_loop(%[[NEW_CLI_0]]) %[[VAL_0:.*]] : i32 in range(%[[SELECT_3]]) {
|
||||
! CHECK: %[[MULI_0:.*]] = arith.muli %[[VAL_0]], %[[LOAD_2]] : i32
|
||||
! CHECK: %[[ADDI_1:.*]] = arith.addi %[[LOAD_0]], %[[MULI_0]] : i32
|
||||
! CHECK: hlfir.assign %[[ADDI_1]] to %[[DECLARE_0]]#0 : i32, !fir.ref<i32>
|
||||
! CHECK: %[[LOAD_3:.*]] = fir.load %[[DECLARE_0]]#0 : !fir.ref<i32>
|
||||
! CHECK: hlfir.assign %[[LOAD_3]] to %[[DECLARE_6]]#0 : i32, !fir.ref<i32>
|
||||
! CHECK: omp.terminator
|
||||
! CHECK: }
|
||||
! CHECK: %[[LOAD_4:.*]] = fir.load %[[DECLARE_5]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[LOAD_5:.*]] = fir.load %[[DECLARE_8]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[LOAD_6:.*]] = fir.load %[[DECLARE_2]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[CONSTANT_2:.*]] = arith.constant 0 : i32
|
||||
! CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : i32
|
||||
! CHECK: %[[CMPI_2:.*]] = arith.cmpi slt, %[[LOAD_6]], %[[CONSTANT_2]] : i32
|
||||
! CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONSTANT_2]], %[[LOAD_6]] : i32
|
||||
! CHECK: %[[SELECT_4:.*]] = arith.select %[[CMPI_2]], %[[SUBI_2]], %[[LOAD_6]] : i32
|
||||
! CHECK: %[[SELECT_5:.*]] = arith.select %[[CMPI_2]], %[[LOAD_5]], %[[LOAD_4]] : i32
|
||||
! CHECK: %[[SELECT_6:.*]] = arith.select %[[CMPI_2]], %[[LOAD_4]], %[[LOAD_5]] : i32
|
||||
! CHECK: %[[SUBI_3:.*]] = arith.subi %[[SELECT_6]], %[[SELECT_5]] overflow<nuw> : i32
|
||||
! CHECK: %[[DIVUI_1:.*]] = arith.divui %[[SUBI_3]], %[[SELECT_4]] : i32
|
||||
! CHECK: %[[ADDI_2:.*]] = arith.addi %[[DIVUI_1]], %[[CONSTANT_3]] overflow<nuw> : i32
|
||||
! CHECK: %[[CMPI_3:.*]] = arith.cmpi slt, %[[SELECT_6]], %[[SELECT_5]] : i32
|
||||
! CHECK: %[[SELECT_7:.*]] = arith.select %[[CMPI_3]], %[[CONSTANT_2]], %[[ADDI_2]] : i32
|
||||
! CHECK: %[[NEW_CLI_1:.*]] = omp.new_cli
|
||||
! CHECK: omp.canonical_loop(%[[NEW_CLI_1]]) %[[VAL_1:.*]] : i32 in range(%[[SELECT_7]]) {
|
||||
! CHECK: %[[MULI_1:.*]] = arith.muli %[[VAL_1]], %[[LOAD_6]] : i32
|
||||
! CHECK: %[[ADDI_3:.*]] = arith.addi %[[LOAD_4]], %[[MULI_1]] : i32
|
||||
! CHECK: hlfir.assign %[[ADDI_3]] to %[[DECLARE_3]]#0 : i32, !fir.ref<i32>
|
||||
! CHECK: %[[LOAD_7:.*]] = fir.load %[[DECLARE_3]]#0 : !fir.ref<i32>
|
||||
! CHECK: hlfir.assign %[[LOAD_7]] to %[[DECLARE_6]]#0 : i32, !fir.ref<i32>
|
||||
! CHECK: omp.terminator
|
||||
! CHECK: }
|
||||
! CHECK: %[[NEW_CLI_2:.*]] = omp.new_cli
|
||||
! CHECK: omp.fuse (%[[NEW_CLI_2]]) <- (%[[NEW_CLI_0]], %[[NEW_CLI_1]])
|
||||
! CHECK: return
|
||||
! CHECK: }
|
||||
|
||||
123
flang/test/Lower/OpenMP/fuse02.f90
Normal file
123
flang/test/Lower/OpenMP/fuse02.f90
Normal file
@ -0,0 +1,123 @@
|
||||
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s | FileCheck %s
|
||||
|
||||
|
||||
subroutine omp_fuse02(lb1, ub1, inc1, lb2, ub2, inc2)
|
||||
integer res, i, j, k
|
||||
integer lb1, ub1, inc1
|
||||
integer lb2, ub2, inc2
|
||||
|
||||
!$omp fuse looprange(2,2)
|
||||
do i = lb1, ub1, inc1
|
||||
res = i
|
||||
end do
|
||||
do j = lb2, ub2, inc2
|
||||
res = j
|
||||
end do
|
||||
do k = lb1, ub2, inc1
|
||||
res = k
|
||||
end do
|
||||
!$omp end fuse
|
||||
|
||||
end subroutine omp_fuse02
|
||||
|
||||
|
||||
! CHECK-LABEL: func.func @_QPomp_fuse02(
|
||||
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "lb1"},
|
||||
! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "ub1"},
|
||||
! CHECK-SAME: %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "inc1"},
|
||||
! CHECK-SAME: %[[ARG3:.*]]: !fir.ref<i32> {fir.bindc_name = "lb2"},
|
||||
! CHECK-SAME: %[[ARG4:.*]]: !fir.ref<i32> {fir.bindc_name = "ub2"},
|
||||
! CHECK-SAME: %[[ARG5:.*]]: !fir.ref<i32> {fir.bindc_name = "inc2"}) {
|
||||
! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
|
||||
! CHECK: %[[ALLOCA_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_fuse02Ei"}
|
||||
! CHECK: %[[DECLARE_0:.*]]:2 = hlfir.declare %[[ALLOCA_0]] {uniq_name = "_QFomp_fuse02Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[DECLARE_1:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Einc1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[DECLARE_2:.*]]:2 = hlfir.declare %[[ARG5]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Einc2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[ALLOCA_1:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFomp_fuse02Ej"}
|
||||
! CHECK: %[[DECLARE_3:.*]]:2 = hlfir.declare %[[ALLOCA_1]] {uniq_name = "_QFomp_fuse02Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[ALLOCA_2:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QFomp_fuse02Ek"}
|
||||
! CHECK: %[[DECLARE_4:.*]]:2 = hlfir.declare %[[ALLOCA_2]] {uniq_name = "_QFomp_fuse02Ek"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[DECLARE_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Elb1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[DECLARE_6:.*]]:2 = hlfir.declare %[[ARG3]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Elb2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[ALLOCA_3:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_fuse02Eres"}
|
||||
! CHECK: %[[DECLARE_7:.*]]:2 = hlfir.declare %[[ALLOCA_3]] {uniq_name = "_QFomp_fuse02Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[DECLARE_8:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Eub1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[DECLARE_9:.*]]:2 = hlfir.declare %[[ARG4]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Eub2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[LOAD_0:.*]] = fir.load %[[DECLARE_5]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DECLARE_8]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[LOAD_2:.*]] = fir.load %[[DECLARE_1]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[CONSTANT_0:.*]] = arith.constant 0 : i32
|
||||
! CHECK: %[[CONSTANT_1:.*]] = arith.constant 1 : i32
|
||||
! CHECK: %[[CMPI_0:.*]] = arith.cmpi slt, %[[LOAD_2]], %[[CONSTANT_0]] : i32
|
||||
! CHECK: %[[SUBI_0:.*]] = arith.subi %[[CONSTANT_0]], %[[LOAD_2]] : i32
|
||||
! CHECK: %[[SELECT_0:.*]] = arith.select %[[CMPI_0]], %[[SUBI_0]], %[[LOAD_2]] : i32
|
||||
! CHECK: %[[SELECT_1:.*]] = arith.select %[[CMPI_0]], %[[LOAD_1]], %[[LOAD_0]] : i32
|
||||
! CHECK: %[[SELECT_2:.*]] = arith.select %[[CMPI_0]], %[[LOAD_0]], %[[LOAD_1]] : i32
|
||||
! CHECK: %[[SUBI_1:.*]] = arith.subi %[[SELECT_2]], %[[SELECT_1]] overflow<nuw> : i32
|
||||
! CHECK: %[[DIVUI_0:.*]] = arith.divui %[[SUBI_1]], %[[SELECT_0]] : i32
|
||||
! CHECK: %[[ADDI_0:.*]] = arith.addi %[[DIVUI_0]], %[[CONSTANT_1]] overflow<nuw> : i32
|
||||
! CHECK: %[[CMPI_1:.*]] = arith.cmpi slt, %[[SELECT_2]], %[[SELECT_1]] : i32
|
||||
! CHECK: %[[SELECT_3:.*]] = arith.select %[[CMPI_1]], %[[CONSTANT_0]], %[[ADDI_0]] : i32
|
||||
! CHECK: %[[NEW_CLI_0:.*]] = omp.new_cli
|
||||
! CHECK: omp.canonical_loop(%[[NEW_CLI_0]]) %[[VAL_0:.*]] : i32 in range(%[[SELECT_3]]) {
|
||||
! CHECK: %[[MULI_0:.*]] = arith.muli %[[VAL_0]], %[[LOAD_2]] : i32
|
||||
! CHECK: %[[ADDI_1:.*]] = arith.addi %[[LOAD_0]], %[[MULI_0]] : i32
|
||||
! CHECK: hlfir.assign %[[ADDI_1]] to %[[DECLARE_0]]#0 : i32, !fir.ref<i32>
|
||||
! CHECK: %[[LOAD_3:.*]] = fir.load %[[DECLARE_0]]#0 : !fir.ref<i32>
|
||||
! CHECK: hlfir.assign %[[LOAD_3]] to %[[DECLARE_7]]#0 : i32, !fir.ref<i32>
|
||||
! CHECK: omp.terminator
|
||||
! CHECK: }
|
||||
! CHECK: %[[LOAD_4:.*]] = fir.load %[[DECLARE_6]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[LOAD_5:.*]] = fir.load %[[DECLARE_9]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[LOAD_6:.*]] = fir.load %[[DECLARE_2]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[CONSTANT_2:.*]] = arith.constant 0 : i32
|
||||
! CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : i32
|
||||
! CHECK: %[[CMPI_2:.*]] = arith.cmpi slt, %[[LOAD_6]], %[[CONSTANT_2]] : i32
|
||||
! CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONSTANT_2]], %[[LOAD_6]] : i32
|
||||
! CHECK: %[[SELECT_4:.*]] = arith.select %[[CMPI_2]], %[[SUBI_2]], %[[LOAD_6]] : i32
|
||||
! CHECK: %[[SELECT_5:.*]] = arith.select %[[CMPI_2]], %[[LOAD_5]], %[[LOAD_4]] : i32
|
||||
! CHECK: %[[SELECT_6:.*]] = arith.select %[[CMPI_2]], %[[LOAD_4]], %[[LOAD_5]] : i32
|
||||
! CHECK: %[[SUBI_3:.*]] = arith.subi %[[SELECT_6]], %[[SELECT_5]] overflow<nuw> : i32
|
||||
! CHECK: %[[DIVUI_1:.*]] = arith.divui %[[SUBI_3]], %[[SELECT_4]] : i32
|
||||
! CHECK: %[[ADDI_2:.*]] = arith.addi %[[DIVUI_1]], %[[CONSTANT_3]] overflow<nuw> : i32
|
||||
! CHECK: %[[CMPI_3:.*]] = arith.cmpi slt, %[[SELECT_6]], %[[SELECT_5]] : i32
|
||||
! CHECK: %[[SELECT_7:.*]] = arith.select %[[CMPI_3]], %[[CONSTANT_2]], %[[ADDI_2]] : i32
|
||||
! CHECK: %[[NEW_CLI_1:.*]] = omp.new_cli
|
||||
! CHECK: omp.canonical_loop(%[[NEW_CLI_1]]) %[[VAL_1:.*]] : i32 in range(%[[SELECT_7]]) {
|
||||
! CHECK: %[[MULI_1:.*]] = arith.muli %[[VAL_1]], %[[LOAD_6]] : i32
|
||||
! CHECK: %[[ADDI_3:.*]] = arith.addi %[[LOAD_4]], %[[MULI_1]] : i32
|
||||
! CHECK: hlfir.assign %[[ADDI_3]] to %[[DECLARE_3]]#0 : i32, !fir.ref<i32>
|
||||
! CHECK: %[[LOAD_7:.*]] = fir.load %[[DECLARE_3]]#0 : !fir.ref<i32>
|
||||
! CHECK: hlfir.assign %[[LOAD_7]] to %[[DECLARE_7]]#0 : i32, !fir.ref<i32>
|
||||
! CHECK: omp.terminator
|
||||
! CHECK: }
|
||||
! CHECK: %[[LOAD_8:.*]] = fir.load %[[DECLARE_5]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[LOAD_9:.*]] = fir.load %[[DECLARE_9]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[LOAD_10:.*]] = fir.load %[[DECLARE_1]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[CONSTANT_4:.*]] = arith.constant 0 : i32
|
||||
! CHECK: %[[CONSTANT_5:.*]] = arith.constant 1 : i32
|
||||
! CHECK: %[[CMPI_4:.*]] = arith.cmpi slt, %[[LOAD_10]], %[[CONSTANT_4]] : i32
|
||||
! CHECK: %[[SUBI_4:.*]] = arith.subi %[[CONSTANT_4]], %[[LOAD_10]] : i32
|
||||
! CHECK: %[[SELECT_8:.*]] = arith.select %[[CMPI_4]], %[[SUBI_4]], %[[LOAD_10]] : i32
|
||||
! CHECK: %[[SELECT_9:.*]] = arith.select %[[CMPI_4]], %[[LOAD_9]], %[[LOAD_8]] : i32
|
||||
! CHECK: %[[SELECT_10:.*]] = arith.select %[[CMPI_4]], %[[LOAD_8]], %[[LOAD_9]] : i32
|
||||
! CHECK: %[[SUBI_5:.*]] = arith.subi %[[SELECT_10]], %[[SELECT_9]] overflow<nuw> : i32
|
||||
! CHECK: %[[DIVUI_2:.*]] = arith.divui %[[SUBI_5]], %[[SELECT_8]] : i32
|
||||
! CHECK: %[[ADDI_4:.*]] = arith.addi %[[DIVUI_2]], %[[CONSTANT_5]] overflow<nuw> : i32
|
||||
! CHECK: %[[CMPI_5:.*]] = arith.cmpi slt, %[[SELECT_10]], %[[SELECT_9]] : i32
|
||||
! CHECK: %[[SELECT_11:.*]] = arith.select %[[CMPI_5]], %[[CONSTANT_4]], %[[ADDI_4]] : i32
|
||||
! CHECK: %[[NEW_CLI_2:.*]] = omp.new_cli
|
||||
! CHECK: omp.canonical_loop(%[[NEW_CLI_2]]) %[[VAL_2:.*]] : i32 in range(%[[SELECT_11]]) {
|
||||
! CHECK: %[[MULI_2:.*]] = arith.muli %[[VAL_2]], %[[LOAD_10]] : i32
|
||||
! CHECK: %[[ADDI_5:.*]] = arith.addi %[[LOAD_8]], %[[MULI_2]] : i32
|
||||
! CHECK: hlfir.assign %[[ADDI_5]] to %[[DECLARE_4]]#0 : i32, !fir.ref<i32>
|
||||
! CHECK: %[[LOAD_11:.*]] = fir.load %[[DECLARE_4]]#0 : !fir.ref<i32>
|
||||
! CHECK: hlfir.assign %[[LOAD_11]] to %[[DECLARE_7]]#0 : i32, !fir.ref<i32>
|
||||
! CHECK: omp.terminator
|
||||
! CHECK: }
|
||||
! CHECK: %[[NEW_CLI_3:.*]] = omp.new_cli
|
||||
! CHECK: %[[NEW_CLI_4:.*]] = omp.new_cli
|
||||
! CHECK: omp.fuse (%[[NEW_CLI_3]], %[[NEW_CLI_4]]) <- (%[[NEW_CLI_0]], %[[NEW_CLI_1]], %[[NEW_CLI_2]]) looprange(first = 2, count = 2)
|
||||
! CHECK: return
|
||||
! CHECK: }
|
||||
|
||||
@ -1370,6 +1370,59 @@ public:
|
||||
tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
|
||||
ArrayRef<Value *> TileSizes);
|
||||
|
||||
/// Fuse a sequence of loops.
|
||||
///
|
||||
/// Fuses the loops of \p Loops.
|
||||
/// The merging of the loops is done in the following structure:
|
||||
///
|
||||
/// Example:
|
||||
/// \code
|
||||
/// for (int i = lb0; i < ub0; i += st0) // trip count is calculated as:
|
||||
/// body(i) // tc0 = (ub0 - lb0 + st0) / st0
|
||||
/// for (int j = lb1; j < ub1; j += st1)
|
||||
/// body(j);
|
||||
///
|
||||
/// ...
|
||||
///
|
||||
/// for (int k = lbk; j < ubk; j += stk)
|
||||
/// body(k);
|
||||
/// \endcode
|
||||
///
|
||||
/// After fusing the loops a single loop is left:
|
||||
/// \code
|
||||
/// for (fuse.index = 0; fuse.index < max(tc0, tc1, ... tck); ++fuse.index) {
|
||||
/// if (fuse.index < tc0){
|
||||
/// iv0 = lb0 + st0 * fuse.index;
|
||||
/// original.index0 = iv0
|
||||
/// body(0);
|
||||
/// }
|
||||
/// if (fuse.index < tc1){
|
||||
/// iv1 = lb1 + st1 * fuse.index;
|
||||
/// original.index1 = iv1
|
||||
/// body(1);
|
||||
/// }
|
||||
///
|
||||
/// ...
|
||||
///
|
||||
/// if (fuse.index < tck){
|
||||
/// ivk = lbk + stk * fuse.index;
|
||||
/// original.indexk = ivk
|
||||
/// body(k);
|
||||
/// }
|
||||
/// }
|
||||
/// \endcode
|
||||
///
|
||||
///
|
||||
/// @param DL Debug location for instructions added by fusion.
|
||||
///
|
||||
/// @param Loops Loops to fuse. The CanonicalLoopInfo objects are
|
||||
/// invalidated by this method, i.e. should not used after
|
||||
/// fusion.
|
||||
///
|
||||
/// \returns A single loop generated by the loop fusion
|
||||
LLVM_ABI CanonicalLoopInfo *fuseLoops(DebugLoc DL,
|
||||
ArrayRef<CanonicalLoopInfo *> Loops);
|
||||
|
||||
/// Fully unroll a loop.
|
||||
///
|
||||
/// Instead of unrolling the loop immediately (and duplicating its body
|
||||
|
||||
@ -6636,6 +6636,116 @@ static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup,
|
||||
}
|
||||
}
|
||||
|
||||
CanonicalLoopInfo *
|
||||
OpenMPIRBuilder::fuseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops) {
|
||||
CanonicalLoopInfo *firstLoop = Loops.front();
|
||||
CanonicalLoopInfo *lastLoop = Loops.back();
|
||||
Function *F = firstLoop->getPreheader()->getParent();
|
||||
|
||||
// Loop control blocks that will become orphaned later
|
||||
SmallVector<BasicBlock *> oldControlBBs;
|
||||
for (CanonicalLoopInfo *Loop : Loops)
|
||||
Loop->collectControlBlocks(oldControlBBs);
|
||||
|
||||
// Collect original trip counts
|
||||
SmallVector<Value *> origTripCounts;
|
||||
for (CanonicalLoopInfo *L : Loops) {
|
||||
assert(L->isValid() && "All input loops must be valid canonical loops");
|
||||
origTripCounts.push_back(L->getTripCount());
|
||||
}
|
||||
|
||||
Builder.SetCurrentDebugLocation(DL);
|
||||
|
||||
// Compute max trip count.
|
||||
// The fused loop will be from 0 to max(origTripCounts)
|
||||
BasicBlock *TCBlock = BasicBlock::Create(F->getContext(), "omp.fuse.comp.tc",
|
||||
F, firstLoop->getHeader());
|
||||
Builder.SetInsertPoint(TCBlock);
|
||||
Value *fusedTripCount = nullptr;
|
||||
for (CanonicalLoopInfo *L : Loops) {
|
||||
assert(L->isValid() && "All loops to fuse must be valid canonical loops");
|
||||
Value *origTripCount = L->getTripCount();
|
||||
if (!fusedTripCount) {
|
||||
fusedTripCount = origTripCount;
|
||||
continue;
|
||||
}
|
||||
Value *condTP = Builder.CreateICmpSGT(fusedTripCount, origTripCount);
|
||||
fusedTripCount = Builder.CreateSelect(condTP, fusedTripCount, origTripCount,
|
||||
".omp.fuse.tc");
|
||||
}
|
||||
|
||||
// Generate new loop
|
||||
CanonicalLoopInfo *fused =
|
||||
createLoopSkeleton(DL, fusedTripCount, F, firstLoop->getBody(),
|
||||
lastLoop->getLatch(), "fused");
|
||||
|
||||
// Replace original loops with the fused loop
|
||||
// Preheader and After are not considered inside the CLI.
|
||||
// These are used to compute the individual TCs of the loops
|
||||
// so they have to be put before the resulting fused loop.
|
||||
// Moving them up for readability.
|
||||
for (size_t i = 0; i < Loops.size() - 1; ++i) {
|
||||
Loops[i]->getPreheader()->moveBefore(TCBlock);
|
||||
Loops[i]->getAfter()->moveBefore(TCBlock);
|
||||
}
|
||||
lastLoop->getPreheader()->moveBefore(TCBlock);
|
||||
|
||||
for (size_t i = 0; i < Loops.size() - 1; ++i) {
|
||||
redirectTo(Loops[i]->getPreheader(), Loops[i]->getAfter(), DL);
|
||||
redirectTo(Loops[i]->getAfter(), Loops[i + 1]->getPreheader(), DL);
|
||||
}
|
||||
redirectTo(lastLoop->getPreheader(), TCBlock, DL);
|
||||
redirectTo(TCBlock, fused->getPreheader(), DL);
|
||||
redirectTo(fused->getAfter(), lastLoop->getAfter(), DL);
|
||||
|
||||
// Build the fused body
|
||||
// Create new Blocks with conditions that jump to the original loop bodies
|
||||
SmallVector<BasicBlock *> condBBs;
|
||||
SmallVector<Value *> condValues;
|
||||
for (size_t i = 0; i < Loops.size(); ++i) {
|
||||
BasicBlock *condBlock = BasicBlock::Create(
|
||||
F->getContext(), "omp.fused.inner.cond", F, Loops[i]->getBody());
|
||||
Builder.SetInsertPoint(condBlock);
|
||||
Value *condValue =
|
||||
Builder.CreateICmpSLT(fused->getIndVar(), origTripCounts[i]);
|
||||
condBBs.push_back(condBlock);
|
||||
condValues.push_back(condValue);
|
||||
}
|
||||
// Join the condition blocks with the bodies of the original loops
|
||||
redirectTo(fused->getBody(), condBBs[0], DL);
|
||||
for (size_t i = 0; i < Loops.size() - 1; ++i) {
|
||||
Builder.SetInsertPoint(condBBs[i]);
|
||||
Builder.CreateCondBr(condValues[i], Loops[i]->getBody(), condBBs[i + 1]);
|
||||
redirectAllPredecessorsTo(Loops[i]->getLatch(), condBBs[i + 1], DL);
|
||||
// Replace the IV with the fused IV
|
||||
Loops[i]->getIndVar()->replaceAllUsesWith(fused->getIndVar());
|
||||
}
|
||||
// Last body jumps to the created end body block
|
||||
Builder.SetInsertPoint(condBBs.back());
|
||||
Builder.CreateCondBr(condValues.back(), lastLoop->getBody(),
|
||||
fused->getLatch());
|
||||
redirectAllPredecessorsTo(lastLoop->getLatch(), fused->getLatch(), DL);
|
||||
// Replace the IV with the fused IV
|
||||
lastLoop->getIndVar()->replaceAllUsesWith(fused->getIndVar());
|
||||
|
||||
// The loop latch must have only one predecessor. Currently it is branched to
|
||||
// from both the last condition block and the last loop body
|
||||
fused->getLatch()->splitBasicBlockBefore(fused->getLatch()->begin(),
|
||||
"omp.fused.pre_latch");
|
||||
|
||||
// Remove unused parts
|
||||
removeUnusedBlocksFromParent(oldControlBBs);
|
||||
|
||||
// Invalidate old CLIs
|
||||
for (CanonicalLoopInfo *L : Loops)
|
||||
L->invalidate();
|
||||
|
||||
#ifndef NDEBUG
|
||||
fused->assertOK();
|
||||
#endif
|
||||
return fused;
|
||||
}
|
||||
|
||||
void OpenMPIRBuilder::unrollLoopFull(DebugLoc, CanonicalLoopInfo *Loop) {
|
||||
LLVMContext &Ctx = Builder.getContext();
|
||||
addLoopMetadata(
|
||||
|
||||
@ -1120,6 +1120,32 @@ class OpenMP_SizesClauseSkip<
|
||||
|
||||
def OpenMP_SizesClause : OpenMP_SizesClauseSkip<>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// V6.0 `looprange` clause
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class OpenMP_LooprangeClauseSkip<
|
||||
bit traits = false, bit arguments = false, bit assemblyFormat = false,
|
||||
bit description = false, bit extraClassDeclaration = false>
|
||||
: OpenMP_Clause<traits, arguments, assemblyFormat, description,
|
||||
extraClassDeclaration> {
|
||||
let arguments = (ins OptionalAttr<I64Attr>:$first,
|
||||
OptionalAttr<I64Attr>:$count);
|
||||
|
||||
let optAssemblyFormat = [{
|
||||
`looprange` `(` `first` `=` $first `,` `count` `=` $count `)`
|
||||
}];
|
||||
|
||||
let description = [{
|
||||
The `looprange` clause contains a range that represent the loops affected
|
||||
by a loop fusion. The `first` attribute is the first loop of the sequence
|
||||
that will be affected and the `count` attribute is the number of loops that
|
||||
are affected by the loop fusion.
|
||||
}];
|
||||
}
|
||||
|
||||
def OpenMP_LooprangeClause : OpenMP_LooprangeClauseSkip<>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// V5.2: [10.1.2] `num_threads` clause
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@ -550,6 +550,33 @@ def TileOp : OpenMPTransformBase_Op<"tile",
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// OpenMP fuse operation
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def FuseOp
|
||||
: OpenMPTransformBase_Op<"fuse", clauses = [OpenMP_LooprangeClause]> {
|
||||
let summary = "OpenMP fuse operation";
|
||||
let description = [{
|
||||
Represents the OpenMP fuse directive introduced in OpenMP 6.0.
|
||||
|
||||
The construct takes a loop sequence and merges the loops specifed by the
|
||||
`looprange` clause and generates a loop sequence with the loops before the
|
||||
`first` attribute untouched, the generated fused loop, and the loops after
|
||||
the the `first` + `count` attributes untouched mantaining the orignal
|
||||
order. If the `looprange` clause is not present all the loops in the
|
||||
sequence are fused generating a single loop.
|
||||
Each logical iteration of the fused loop executes a logical iteration of
|
||||
each affected loop. The fused loop has the number of logical iterations
|
||||
equal to the affected loop with most logical iterations.
|
||||
|
||||
The `first` and `count` attributes of the `looprange` clause are constant
|
||||
and known beforehand if present.
|
||||
}]#clausesDescription;
|
||||
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// 2.8.3 Workshare Construct
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@ -3455,6 +3455,15 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
|
||||
.Case([&](UnrollHeuristicOp op) -> std::string {
|
||||
llvm_unreachable("heuristic unrolling does not generate a loop");
|
||||
})
|
||||
.Case([&](FuseOp op) -> std::string {
|
||||
unsigned opnum = generator->getOperandNumber();
|
||||
// The position of the first loop to be fused is the same position
|
||||
// as the resulting fused loop
|
||||
if (op.getFirst().has_value() && opnum != op.getFirst().value())
|
||||
return "canonloop_fuse";
|
||||
else
|
||||
return "fused";
|
||||
})
|
||||
.Case([&](TileOp op) -> std::string {
|
||||
auto [generateesFirst, generateesCount] =
|
||||
op.getGenerateesODSOperandIndexAndLength();
|
||||
@ -3830,6 +3839,60 @@ std::pair<unsigned, unsigned> TileOp::getGenerateesODSOperandIndexAndLength() {
|
||||
return getODSOperandIndexAndLength(odsIndex_generatees);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FuseOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
static void printLoopTransformClis(OpAsmPrinter &p, FuseOp op,
|
||||
OperandRange generatees,
|
||||
OperandRange applyees) {
|
||||
if (!generatees.empty())
|
||||
p << '(' << llvm::interleaved(generatees) << ')';
|
||||
|
||||
if (!applyees.empty())
|
||||
p << " <- (" << llvm::interleaved(applyees) << ')';
|
||||
}
|
||||
|
||||
LogicalResult FuseOp::verify() {
|
||||
if (getApplyees().size() < 2)
|
||||
return emitOpError() << "must apply to at least two loops";
|
||||
|
||||
if (getFirst().has_value() && getCount().has_value()) {
|
||||
int64_t first = getFirst().value();
|
||||
int64_t count = getCount().value();
|
||||
if ((unsigned)(first + count - 1) > getApplyees().size())
|
||||
return emitOpError() << "the numbers of applyees must be at least first "
|
||||
"minus one plus count attributes";
|
||||
if (!getGeneratees().empty() &&
|
||||
getGeneratees().size() != getApplyees().size() + 1 - count)
|
||||
return emitOpError() << "the number of generatees must be the number of "
|
||||
"aplyees plus one minus count";
|
||||
|
||||
} else {
|
||||
if (!getGeneratees().empty() && getGeneratees().size() != 1)
|
||||
return emitOpError()
|
||||
<< "in a complete fuse the number of generatees must be exactly 1";
|
||||
}
|
||||
for (auto &&applyee : getApplyees()) {
|
||||
auto [create, gen, cons] = decodeCli(applyee);
|
||||
|
||||
if (!gen)
|
||||
return emitOpError() << "applyee CLI has no generator";
|
||||
auto loop = dyn_cast_or_null<CanonicalLoopOp>(gen->getOwner());
|
||||
if (!loop)
|
||||
return emitOpError()
|
||||
<< "currently only supports omp.canonical_loop as applyee";
|
||||
}
|
||||
return success();
|
||||
}
|
||||
std::pair<unsigned, unsigned> FuseOp::getApplyeesODSOperandIndexAndLength() {
|
||||
return getODSOperandIndexAndLength(odsIndex_applyees);
|
||||
}
|
||||
|
||||
std::pair<unsigned, unsigned> FuseOp::getGenerateesODSOperandIndexAndLength() {
|
||||
return getODSOperandIndexAndLength(odsIndex_generatees);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Critical construct (2.17.1)
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@ -3794,6 +3794,51 @@ static LogicalResult applyTile(omp::TileOp op, llvm::IRBuilderBase &builder,
|
||||
return success();
|
||||
}
|
||||
|
||||
/// Apply a `#pragma omp fuse` / `!$omp fuse` transformation using the
|
||||
/// OpenMPIRBuilder.
|
||||
static LogicalResult applyFuse(omp::FuseOp op, llvm::IRBuilderBase &builder,
|
||||
LLVM::ModuleTranslation &moduleTranslation) {
|
||||
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
||||
llvm::OpenMPIRBuilder::LocationDescription loc(builder);
|
||||
|
||||
// Select what CLIs are going to be fused
|
||||
SmallVector<llvm::CanonicalLoopInfo *> beforeFuse, toFuse, afterFuse;
|
||||
for (size_t i = 0; i < op.getApplyees().size(); i++) {
|
||||
Value applyee = op.getApplyees()[i];
|
||||
llvm::CanonicalLoopInfo *consBuilderCLI =
|
||||
moduleTranslation.lookupOMPLoop(applyee);
|
||||
assert(applyee && "Canonical loop must already been translated");
|
||||
if (op.getFirst().has_value() && i < op.getFirst().value() - 1)
|
||||
beforeFuse.push_back(consBuilderCLI);
|
||||
else if (op.getCount().has_value() &&
|
||||
i >= op.getFirst().value() + op.getCount().value() - 1)
|
||||
afterFuse.push_back(consBuilderCLI);
|
||||
else
|
||||
toFuse.push_back(consBuilderCLI);
|
||||
}
|
||||
assert(
|
||||
(op.getGeneratees().empty() ||
|
||||
beforeFuse.size() + afterFuse.size() + 1 == op.getGeneratees().size()) &&
|
||||
"Wrong number of generatees");
|
||||
|
||||
// do the fuse
|
||||
auto generatedLoop = ompBuilder->fuseLoops(loc.DL, toFuse);
|
||||
if (!op.getGeneratees().empty()) {
|
||||
size_t i = 0;
|
||||
for (; i < beforeFuse.size(); i++)
|
||||
moduleTranslation.mapOmpLoop(op.getGeneratees()[i], beforeFuse[i]);
|
||||
moduleTranslation.mapOmpLoop(op.getGeneratees()[i++], generatedLoop);
|
||||
for (; i < afterFuse.size(); i++)
|
||||
moduleTranslation.mapOmpLoop(op.getGeneratees()[i], afterFuse[i]);
|
||||
}
|
||||
|
||||
// CLIs can only be consumed once
|
||||
for (Value applyee : op.getApplyees())
|
||||
moduleTranslation.invalidateOmpLoop(applyee);
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
/// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
|
||||
static llvm::AtomicOrdering
|
||||
convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
|
||||
@ -7271,6 +7316,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
|
||||
.Case([&](omp::TileOp op) {
|
||||
return applyTile(op, builder, moduleTranslation);
|
||||
})
|
||||
.Case([&](omp::FuseOp op) {
|
||||
return applyFuse(op, builder, moduleTranslation);
|
||||
})
|
||||
.Case([&](omp::TargetAllocMemOp) {
|
||||
return convertTargetAllocMemOp(*op, builder, moduleTranslation);
|
||||
})
|
||||
|
||||
114
mlir/test/Dialect/OpenMP/cli-fuse.mlir
Normal file
114
mlir/test/Dialect/OpenMP/cli-fuse.mlir
Normal file
@ -0,0 +1,114 @@
|
||||
// RUN: mlir-opt %s | FileCheck %s --enable-var-scope
|
||||
// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
|
||||
|
||||
|
||||
// Raw syntax check (MLIR output is always pretty-printed)
|
||||
// CHECK-LABEL: @omp_fuse_raw(
|
||||
// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32) {
|
||||
func.func @omp_fuse_raw(%tc1 : i32, %tc2 : i32) -> () {
|
||||
// CHECK-NEXT: %canonloop_s0 = omp.new_cli
|
||||
%canonloop_s0 = "omp.new_cli" () : () -> (!omp.cli)
|
||||
// CHECK-NEXT: %canonloop_s1 = omp.new_cli
|
||||
%canonloop_s1 = "omp.new_cli" () : () -> (!omp.cli)
|
||||
// CHECK-NEXT: %fused = omp.new_cli
|
||||
%fused = "omp.new_cli" () : () -> (!omp.cli)
|
||||
// CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc1]]) {
|
||||
"omp.canonical_loop" (%tc1, %canonloop_s0) ({
|
||||
^bb0(%iv_s0: i32):
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}) : (i32, !omp.cli) -> ()
|
||||
// CHECK: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc2]]) {
|
||||
"omp.canonical_loop" (%tc2, %canonloop_s1) ({
|
||||
^bb0(%iv_s1: i32):
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}) : (i32, !omp.cli) -> ()
|
||||
// CHECK: omp.fuse (%fused) <- (%canonloop_s0, %canonloop_s1)
|
||||
"omp.fuse"(%fused, %canonloop_s0, %canonloop_s1) <{operandSegmentSizes = array<i32: 1, 2>}> : (!omp.cli, !omp.cli, !omp.cli) -> ()
|
||||
return
|
||||
}
|
||||
|
||||
// Pretty syntax check
|
||||
// CHECK-LABEL: @omp_fuse_pretty(
|
||||
// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32) {
|
||||
func.func @omp_fuse_pretty(%tc1 : i32, %tc2 : i32) -> () {
|
||||
// CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
|
||||
%canonloop_s0 = omp.new_cli
|
||||
// CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
|
||||
%canonloop_s1 = omp.new_cli
|
||||
// CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
|
||||
%fused = omp.new_cli
|
||||
// CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc1]]) {
|
||||
omp.canonical_loop (%canonloop_s0) %iv_s0 : i32 in range(%tc1) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
// CHECK: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc2]]) {
|
||||
omp.canonical_loop (%canonloop_s1) %iv_s1 : i32 in range(%tc2) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
// CHECK: omp.fuse (%fused) <- (%canonloop_s0, %canonloop_s1)
|
||||
omp.fuse(%fused) <- (%canonloop_s0, %canonloop_s1)
|
||||
return
|
||||
}
|
||||
|
||||
// Specifying the generatees for omp.fuse is optional
|
||||
// CHECK-LABEL: @omp_fuse_optionalgen_pretty(
|
||||
// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32) {
|
||||
func.func @omp_fuse_optionalgen_pretty(%tc1 : i32, %tc2 : i32) -> () {
|
||||
// CHECK-NEXT: %canonloop_s0 = omp.new_cli
|
||||
%canonloop_s0 = omp.new_cli
|
||||
// CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc1]]) {
|
||||
omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc1) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
// CHECK: %canonloop_s1 = omp.new_cli
|
||||
%canonloop_s1 = omp.new_cli
|
||||
// CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc2]]) {
|
||||
omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc2) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
// CHECK: omp.fuse <- (%canonloop_s0, %canonloop_s1)
|
||||
omp.fuse <- (%canonloop_s0, %canonloop_s1)
|
||||
return
|
||||
}
|
||||
|
||||
// Fuse with looprange attributes
|
||||
// CHECK-LABEL: @omp_fuse_looprange(
|
||||
// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32, %[[tc3:.+]]: i32) {
|
||||
func.func @omp_fuse_looprange(%tc1 : i32, %tc2 : i32, %tc3 : i32) -> () {
|
||||
// CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
|
||||
%canonloop_s0 = omp.new_cli
|
||||
// CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
|
||||
%canonloop_s1 = omp.new_cli
|
||||
// CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
|
||||
%canonloop_s2 = omp.new_cli
|
||||
// CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
|
||||
%canonloop_fuse = omp.new_cli
|
||||
// CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
|
||||
%fused = omp.new_cli
|
||||
// CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc1]]) {
|
||||
omp.canonical_loop (%canonloop_s0) %iv_s0 : i32 in range(%tc1) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
// CHECK: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc2]]) {
|
||||
omp.canonical_loop (%canonloop_s1) %iv_s1 : i32 in range(%tc2) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
// CHECK: omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%[[tc3]]) {
|
||||
omp.canonical_loop (%canonloop_s2) %iv_s2 : i32 in range(%tc3) {
|
||||
// CHECK: omp.terminator
|
||||
omp.terminator
|
||||
}
|
||||
// CHECK: omp.fuse (%canonloop_fuse, %fused) <- (%canonloop_s0,
|
||||
// %canonloop_s1, %canonloop_s2) looprange(first = 1, count = 2)
|
||||
omp.fuse(%fused, %canonloop_fuse) <- (%canonloop_s0, %canonloop_s1, %canonloop_s2) looprange(first = 1, count = 2)
|
||||
return
|
||||
}
|
||||
|
||||
102
mlir/test/Dialect/OpenMP/invalid-fuse.mlir
Normal file
102
mlir/test/Dialect/OpenMP/invalid-fuse.mlir
Normal file
@ -0,0 +1,102 @@
|
||||
// RUN: mlir-opt -split-input-file -verify-diagnostics %s
|
||||
|
||||
|
||||
func.func @no_loops(%tc1 : i32, %tc2 : i32) {
|
||||
// expected-error@+1 {{'omp.fuse' op must apply to at least two loops}}
|
||||
omp.fuse <-()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @one_loop(%tc1 : i32, %tc2 : i32) {
|
||||
%canonloop = omp.new_cli
|
||||
omp.canonical_loop(%canonloop) %iv : i32 in range(%tc1) {
|
||||
omp.terminator
|
||||
}
|
||||
// expected-error@+1 {{'omp.fuse' op must apply to at least two loops}}
|
||||
omp.fuse <-(%canonloop)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @missing_generator(%tc1 : i32, %tc2 : i32) {
|
||||
// expected-error@+1 {{'omp.new_cli' op CLI has no generator}}
|
||||
%canonloop = omp.new_cli
|
||||
|
||||
// expected-note@+1 {{see consumer here: "omp.fuse"(%0) <{operandSegmentSizes = array<i32: 0, 1>}> : (!omp.cli) -> ()}}
|
||||
omp.fuse <-(%canonloop)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @wrong_generatees1(%tc1 : i32, %tc2 : i32) {
|
||||
%canonloop1 = omp.new_cli
|
||||
%canonloop2 = omp.new_cli
|
||||
omp.canonical_loop(%canonloop1) %iv : i32 in range(%tc1) {
|
||||
omp.terminator
|
||||
}
|
||||
omp.canonical_loop(%canonloop2) %iv : i32 in range(%tc2) {
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
%fused1 = omp.new_cli
|
||||
%fused2 = omp.new_cli
|
||||
// expected-error@+1 {{'omp.fuse' op in a complete fuse the number of generatees must be exactly 1}}
|
||||
omp.fuse (%fused1, %fused2) <-(%canonloop1, %canonloop2)
|
||||
|
||||
llvm.return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @wrong_generatees2(%tc1 : i32, %tc2 : i32, %tc3 : i32) {
|
||||
%canonloop1 = omp.new_cli
|
||||
%canonloop2 = omp.new_cli
|
||||
%canonloop3 = omp.new_cli
|
||||
omp.canonical_loop(%canonloop1) %iv : i32 in range(%tc1) {
|
||||
omp.terminator
|
||||
}
|
||||
omp.canonical_loop(%canonloop2) %iv : i32 in range(%tc2) {
|
||||
omp.terminator
|
||||
}
|
||||
omp.canonical_loop(%canonloop3) %iv : i32 in range(%tc3) {
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
%fused = omp.new_cli
|
||||
// expected-error@+1 {{'omp.fuse' op the number of generatees must be the number of aplyees plus one minus count}}
|
||||
omp.fuse (%fused) <-(%canonloop1, %canonloop2, %canonloop3) looprange(first = 1, count = 2)
|
||||
|
||||
llvm.return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @wrong_applyees(%tc1 : i32, %tc2 : i32, %tc3 : i32) {
|
||||
%canonloop1 = omp.new_cli
|
||||
%canonloop2 = omp.new_cli
|
||||
%canonloop3 = omp.new_cli
|
||||
omp.canonical_loop(%canonloop1) %iv : i32 in range(%tc1) {
|
||||
omp.terminator
|
||||
}
|
||||
omp.canonical_loop(%canonloop2) %iv : i32 in range(%tc2) {
|
||||
omp.terminator
|
||||
}
|
||||
omp.canonical_loop(%canonloop3) %iv : i32 in range(%tc3) {
|
||||
omp.terminator
|
||||
}
|
||||
|
||||
%fused = omp.new_cli
|
||||
%canonloop_fuse = omp.new_cli
|
||||
// expected-error@+1 {{'omp.fuse' op the numbers of applyees must be at least first minus one plus count attributes}}
|
||||
omp.fuse (%fused, %canonloop_fuse) <-(%canonloop1, %canonloop2, %canonloop3) looprange(first = 1, count = 5)
|
||||
|
||||
llvm.return
|
||||
}
|
||||
|
||||
100
mlir/test/Target/LLVMIR/openmp-cli-fuse01.mlir
Normal file
100
mlir/test/Target/LLVMIR/openmp-cli-fuse01.mlir
Normal file
@ -0,0 +1,100 @@
|
||||
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --enable-var-scope
|
||||
|
||||
|
||||
llvm.func @fuse_trivial_loops(%baseptr: !llvm.ptr, %tc1: i32, %tc2: i32) -> () {
|
||||
%literal_cli1 = omp.new_cli
|
||||
omp.canonical_loop(%literal_cli1) %iv1 : i32 in range(%tc1) {
|
||||
%ptr = llvm.getelementptr inbounds %baseptr[%iv1] : (!llvm.ptr, i32) -> !llvm.ptr, f32
|
||||
%val = llvm.mlir.constant(42.0 : f32) : f32
|
||||
llvm.store %val, %ptr : f32, !llvm.ptr
|
||||
omp.terminator
|
||||
}
|
||||
%literal_cli2 = omp.new_cli
|
||||
omp.canonical_loop(%literal_cli2) %iv2 : i32 in range(%tc2) {
|
||||
%ptr = llvm.getelementptr inbounds %baseptr[%iv2] : (!llvm.ptr, i32) -> !llvm.ptr, f32
|
||||
%val = llvm.mlir.constant(21.0 : f32) : f32
|
||||
llvm.store %val, %ptr : f32, !llvm.ptr
|
||||
omp.terminator
|
||||
}
|
||||
omp.fuse <- (%literal_cli1, %literal_cli2)
|
||||
llvm.return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define void @fuse_trivial_loops(
|
||||
// CHECK-SAME: ptr %[[VAL_11:.+]], i32 %[[VAL_5:.+]], i32 %[[VAL_16:.+]]) {
|
||||
// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_AFTER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER1:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER1]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FUSE_COMP_TC:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSE_COMP_TC]]:
|
||||
// CHECK-NEXT: %[[VAL_15:.+]] = icmp sgt i32 %[[VAL_5:.+]], %[[VAL_16:.+]]
|
||||
// CHECK-NEXT: %[[VAL_17:.+]] = select i1 %[[VAL_15:.+]], i32 %[[VAL_5:.+]], i32 %[[VAL_16:.+]]
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_PREHEADER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_HEADER]]:
|
||||
// CHECK-NEXT: %[[VAL_4:.+]] = phi i32 [ 0, %[[VAL_18:.+]] ], [ %[[VAL_27:.+]], %[[VAL_26:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_COND]]:
|
||||
// CHECK-NEXT: %[[VAL_29:.+]] = icmp ult i32 %[[VAL_4:.+]], %[[VAL_17:.+]]
|
||||
// CHECK-NEXT: br i1 %[[VAL_29:.+]], label %[[OMP_FUSED_BODY:.+]], label %[[OMP_FUSED_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_BODY]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_INNER_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_INNER_COND]]:
|
||||
// CHECK-NEXT: %[[VAL_3:.+]] = icmp slt i32 %[[VAL_4:.+]], %[[VAL_5:.+]]
|
||||
// CHECK-NEXT: br i1 %[[VAL_3:.+]], label %[[OMP_OMP_LOOP_BODY:.+]], label %[[OMP_FUSED_INNER_COND13:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_BODY]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_REGION:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_REGION]]:
|
||||
// CHECK-NEXT: %[[VAL_10:.+]] = getelementptr inbounds float, ptr %[[VAL_11:.+]], i32 %[[VAL_4:.+]]
|
||||
// CHECK-NEXT: store float 4.200000e+01, ptr %[[VAL_10:.+]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_REGION_CONT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_REGION_CONT]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_INNER_COND13:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_INNER_COND13]]:
|
||||
// CHECK-NEXT: %[[VAL_19:.+]] = icmp slt i32 %[[VAL_4:.+]], %[[VAL_16:.+]]
|
||||
// CHECK-NEXT: br i1 %[[VAL_19:.+]], label %[[OMP_OMP_LOOP_BODY4:.+]], label %[[OMP_FUSED_PRE_LATCH:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_BODY4]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_REGION12:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_REGION12]]:
|
||||
// CHECK-NEXT: %[[VAL_23:.+]] = getelementptr inbounds float, ptr %[[VAL_11:.+]], i32 %[[VAL_4:.+]]
|
||||
// CHECK-NEXT: store float 2.100000e+01, ptr %[[VAL_23:.+]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_REGION_CONT11:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_REGION_CONT11]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_PRE_LATCH:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_PRE_LATCH]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_INC:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_INC]]:
|
||||
// CHECK-NEXT: %[[VAL_27:.+]] = add nuw i32 %[[VAL_4:.+]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_EXIT]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_AFTER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER7:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_AFTER7]]:
|
||||
// CHECK-NEXT: ret void
|
||||
|
||||
140
mlir/test/Target/LLVMIR/openmp-cli-fuse02.mlir
Normal file
140
mlir/test/Target/LLVMIR/openmp-cli-fuse02.mlir
Normal file
@ -0,0 +1,140 @@
|
||||
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --enable-var-scope
|
||||
|
||||
|
||||
llvm.func @fuse_looprange_loops(%baseptr: !llvm.ptr, %tc1: i32, %tc2: i32, %tc3: i32) -> () {
|
||||
%literal_cli1 = omp.new_cli
|
||||
omp.canonical_loop(%literal_cli1) %iv1 : i32 in range(%tc1) {
|
||||
%ptr = llvm.getelementptr inbounds %baseptr[%iv1] : (!llvm.ptr, i32) -> !llvm.ptr, f32
|
||||
%val = llvm.mlir.constant(42.0 : f32) : f32
|
||||
llvm.store %val, %ptr : f32, !llvm.ptr
|
||||
omp.terminator
|
||||
}
|
||||
%literal_cli2 = omp.new_cli
|
||||
omp.canonical_loop(%literal_cli2) %iv2 : i32 in range(%tc2) {
|
||||
%ptr = llvm.getelementptr inbounds %baseptr[%iv2] : (!llvm.ptr, i32) -> !llvm.ptr, f32
|
||||
%val = llvm.mlir.constant(21.0 : f32) : f32
|
||||
llvm.store %val, %ptr : f32, !llvm.ptr
|
||||
omp.terminator
|
||||
}
|
||||
%literal_cli3 = omp.new_cli
|
||||
omp.canonical_loop(%literal_cli3) %iv3 : i32 in range(%tc3) {
|
||||
%ptr = llvm.getelementptr inbounds %baseptr[%iv3] : (!llvm.ptr, i32) -> !llvm.ptr, f32
|
||||
%val = llvm.mlir.constant(63.0 : f32) : f32
|
||||
llvm.store %val, %ptr : f32, !llvm.ptr
|
||||
omp.terminator
|
||||
}
|
||||
omp.fuse <- (%literal_cli1, %literal_cli2, %literal_cli3) looprange(first = 1, count = 2)
|
||||
llvm.return
|
||||
}
|
||||
|
||||
|
||||
// CHECK-LABEL: define void @fuse_looprange_loops(
|
||||
// CHECK-SAME: ptr %[[VAL_23:.+]], i32 %[[VAL_5:.+]], i32 %[[VAL_6:.+]], i32 %[[VAL_40:.+]]) {
|
||||
// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_AFTER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER1:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER1]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FUSE_COMP_TC:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSE_COMP_TC]]:
|
||||
// CHECK-NEXT: %[[VAL_4:.+]] = icmp sgt i32 %[[VAL_5:.+]], %[[VAL_6:.+]]
|
||||
// CHECK-NEXT: %[[VAL_7:.+]] = select i1 %[[VAL_4:.+]], i32 %[[VAL_5:.+]], i32 %[[VAL_6:.+]]
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_PREHEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_PREHEADER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_HEADER]]:
|
||||
// CHECK-NEXT: %[[VAL_11:.+]] = phi i32 [ 0, %[[VAL_8:.+]] ], [ %[[VAL_12:.+]], %[[VAL_10:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_COND]]:
|
||||
// CHECK-NEXT: %[[VAL_14:.+]] = icmp ult i32 %[[VAL_11:.+]], %[[VAL_7:.+]]
|
||||
// CHECK-NEXT: br i1 %[[VAL_14:.+]], label %[[OMP_FUSED_BODY:.+]], label %[[OMP_FUSED_EXIT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_BODY]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_INNER_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_INNER_COND]]:
|
||||
// CHECK-NEXT: %[[VAL_18:.+]] = icmp slt i32 %[[VAL_11:.+]], %[[VAL_5:.+]]
|
||||
// CHECK-NEXT: br i1 %[[VAL_18:.+]], label %[[OMP_OMP_LOOP_BODY:.+]], label %[[OMP_FUSED_INNER_COND25:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_BODY]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_REGION:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_REGION]]:
|
||||
// CHECK-NEXT: %[[VAL_22:.+]] = getelementptr inbounds float, ptr %[[VAL_23:.+]], i32 %[[VAL_11:.+]]
|
||||
// CHECK-NEXT: store float 4.200000e+01, ptr %[[VAL_22:.+]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_REGION_CONT:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_REGION_CONT]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_INNER_COND25:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_INNER_COND25]]:
|
||||
// CHECK-NEXT: %[[VAL_25:.+]] = icmp slt i32 %[[VAL_11:.+]], %[[VAL_6:.+]]
|
||||
// CHECK-NEXT: br i1 %[[VAL_25:.+]], label %[[OMP_OMP_LOOP_BODY4:.+]], label %[[OMP_FUSED_PRE_LATCH:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_BODY4]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_REGION12:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_REGION12]]:
|
||||
// CHECK-NEXT: %[[VAL_29:.+]] = getelementptr inbounds float, ptr %[[VAL_23:.+]], i32 %[[VAL_11:.+]]
|
||||
// CHECK-NEXT: store float 2.100000e+01, ptr %[[VAL_29:.+]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_REGION_CONT11:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_REGION_CONT11]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_PRE_LATCH:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_PRE_LATCH]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_INC:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_INC]]:
|
||||
// CHECK-NEXT: %[[VAL_12:.+]] = add nuw i32 %[[VAL_11:.+]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_HEADER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_EXIT]]:
|
||||
// CHECK-NEXT: br label %[[OMP_FUSED_AFTER:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_FUSED_AFTER]]:
|
||||
// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER7:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_AFTER7]]:
|
||||
// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER13:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER13]]:
|
||||
// CHECK-NEXT: br label %[[OMP_OMP_LOOP_HEADER14:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_HEADER14]]:
|
||||
// CHECK-NEXT: %[[VAL_36:.+]] = phi i32 [ 0, %[[VAL_33:.+]] ], [ %[[VAL_37:.+]], %[[VAL_35:.+]] ]
|
||||
// CHECK-NEXT: br label %[[OMP_OMP_LOOP_COND15:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_COND15]]:
|
||||
// CHECK-NEXT: %[[VAL_39:.+]] = icmp ult i32 %[[VAL_36:.+]], %[[VAL_40:.+]]
|
||||
// CHECK-NEXT: br i1 %[[VAL_39:.+]], label %[[OMP_OMP_LOOP_BODY16:.+]], label %[[OMP_OMP_LOOP_EXIT18:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_BODY16]]:
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_REGION24:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_REGION24]]:
|
||||
// CHECK-NEXT: %[[VAL_44:.+]] = getelementptr inbounds float, ptr %[[VAL_23:.+]], i32 %[[VAL_36:.+]]
|
||||
// CHECK-NEXT: store float 6.300000e+01, ptr %[[VAL_44:.+]], align 4
|
||||
// CHECK-NEXT: br label %[[OMP_REGION_CONT23:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_REGION_CONT23]]:
|
||||
// CHECK-NEXT: br label %[[OMP_OMP_LOOP_INC17:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_INC17]]:
|
||||
// CHECK-NEXT: %[[VAL_37:.+]] = add nuw i32 %[[VAL_36:.+]], 1
|
||||
// CHECK-NEXT: br label %[[OMP_OMP_LOOP_HEADER14:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_EXIT18]]:
|
||||
// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER19:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_OMP_LOOP_AFTER19]]:
|
||||
// CHECK-NEXT: ret void
|
||||
|
||||
60
openmp/runtime/test/transform/fuse/do-looprange.f90
Normal file
60
openmp/runtime/test/transform/fuse/do-looprange.f90
Normal file
@ -0,0 +1,60 @@
|
||||
! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe
|
||||
! RUN: %t.exe | FileCheck %s --match-full-lines
|
||||
|
||||
program fuse_full
|
||||
implicit none
|
||||
integer i, j, k, u
|
||||
|
||||
print *, 'do'
|
||||
|
||||
!$OMP FUSE LOOPRANGE(2,2)
|
||||
do i=5, 25, 5
|
||||
print '("i=", I0)', i
|
||||
end do
|
||||
do j=10, 100, 10
|
||||
print '("j=", I0)', j
|
||||
end do
|
||||
do k=10, 0, -1
|
||||
print '("k=", I0)', k
|
||||
end do
|
||||
do u=5, 25, 5
|
||||
print '("u=", I0)', u
|
||||
end do
|
||||
!$OMP END FUSE
|
||||
|
||||
print *, 'done'
|
||||
end program
|
||||
|
||||
! CHECK: do
|
||||
! CHECK-NEXT: i=5
|
||||
! CHECK-NEXT: i=10
|
||||
! CHECK-NEXT: i=15
|
||||
! CHECK-NEXT: i=20
|
||||
! CHECK-NEXT: i=25
|
||||
! CHECK-NEXT: j=10
|
||||
! CHECK-NEXT: k=10
|
||||
! CHECK-NEXT: j=20
|
||||
! CHECK-NEXT: k=9
|
||||
! CHECK-NEXT: j=30
|
||||
! CHECK-NEXT: k=8
|
||||
! CHECK-NEXT: j=40
|
||||
! CHECK-NEXT: k=7
|
||||
! CHECK-NEXT: j=50
|
||||
! CHECK-NEXT: k=6
|
||||
! CHECK-NEXT: j=60
|
||||
! CHECK-NEXT: k=5
|
||||
! CHECK-NEXT: j=70
|
||||
! CHECK-NEXT: k=4
|
||||
! CHECK-NEXT: j=80
|
||||
! CHECK-NEXT: k=3
|
||||
! CHECK-NEXT: j=90
|
||||
! CHECK-NEXT: k=2
|
||||
! CHECK-NEXT: j=100
|
||||
! CHECK-NEXT: k=1
|
||||
! CHECK-NEXT: k=0
|
||||
! CHECK-NEXT: u=5
|
||||
! CHECK-NEXT: u=10
|
||||
! CHECK-NEXT: u=15
|
||||
! CHECK-NEXT: u=20
|
||||
! CHECK-NEXT: u=25
|
||||
! CHECK-NEXT: done
|
||||
52
openmp/runtime/test/transform/fuse/do.f90
Normal file
52
openmp/runtime/test/transform/fuse/do.f90
Normal file
@ -0,0 +1,52 @@
|
||||
! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe
|
||||
! RUN: %t.exe | FileCheck %s --match-full-lines
|
||||
|
||||
program fuse_full
|
||||
implicit none
|
||||
integer i, j, k
|
||||
|
||||
print *, 'do'
|
||||
|
||||
!$OMP FUSE
|
||||
do i=5, 25, 5
|
||||
print '("i=", I0)', i
|
||||
end do
|
||||
do j=10, 100, 10
|
||||
print '("j=", I0)', j
|
||||
end do
|
||||
do k=10, 0, -1
|
||||
print '("k=", I0)', k
|
||||
end do
|
||||
!$OMP END FUSE
|
||||
|
||||
print *, 'done'
|
||||
end program
|
||||
|
||||
! CHECK: do
|
||||
! CHECK-NEXT: i=5
|
||||
! CHECK-NEXT: j=10
|
||||
! CHECK-NEXT: k=10
|
||||
! CHECK-NEXT: i=10
|
||||
! CHECK-NEXT: j=20
|
||||
! CHECK-NEXT: k=9
|
||||
! CHECK-NEXT: i=15
|
||||
! CHECK-NEXT: j=30
|
||||
! CHECK-NEXT: k=8
|
||||
! CHECK-NEXT: i=20
|
||||
! CHECK-NEXT: j=40
|
||||
! CHECK-NEXT: k=7
|
||||
! CHECK-NEXT: i=25
|
||||
! CHECK-NEXT: j=50
|
||||
! CHECK-NEXT: k=6
|
||||
! CHECK-NEXT: j=60
|
||||
! CHECK-NEXT: k=5
|
||||
! CHECK-NEXT: j=70
|
||||
! CHECK-NEXT: k=4
|
||||
! CHECK-NEXT: j=80
|
||||
! CHECK-NEXT: k=3
|
||||
! CHECK-NEXT: j=90
|
||||
! CHECK-NEXT: k=2
|
||||
! CHECK-NEXT: j=100
|
||||
! CHECK-NEXT: k=1
|
||||
! CHECK-NEXT: k=0
|
||||
! CHECK-NEXT: done
|
||||
Loading…
x
Reference in New Issue
Block a user