
Fixes #108136 In #108136 (the new testcase), flang was missing the length parameter required for the variable length string when boxing the global variable. The code that is initializing global variables for OpenMP did not support types with length parameters. Instead of duplicating this initialization logic in OpenMP, I decided to use the exact same initialization as is used in the base language because this will already be well tested and will be updated for any new types. The difference for OpenMP is that the global variables will be zero initialized instead of left undefined. Previously `Fortran::lower::createGlobalInitialization` was used to share a smaller amount of the logic with the base language lowering. I think this bug has demonstrated that helper was too low level to be helpful, and it was only used in OpenMP so I have made it static inside of ConvertVariable.cpp.
4542 lines
199 KiB
C++
4542 lines
199 KiB
C++
//===-- OpenMP.cpp -- Open MP directive lowering --------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "flang/Lower/OpenMP.h"
|
|
|
|
#include "ClauseProcessor.h"
|
|
#include "Clauses.h"
|
|
#include "DataSharingProcessor.h"
|
|
#include "Decomposer.h"
|
|
#include "ReductionProcessor.h"
|
|
#include "Utils.h"
|
|
#include "flang/Common/idioms.h"
|
|
#include "flang/Lower/Bridge.h"
|
|
#include "flang/Lower/ConvertExpr.h"
|
|
#include "flang/Lower/ConvertVariable.h"
|
|
#include "flang/Lower/DirectivesCommon.h"
|
|
#include "flang/Lower/StatementContext.h"
|
|
#include "flang/Lower/SymbolMap.h"
|
|
#include "flang/Optimizer/Builder/BoxValue.h"
|
|
#include "flang/Optimizer/Builder/FIRBuilder.h"
|
|
#include "flang/Optimizer/Builder/Todo.h"
|
|
#include "flang/Optimizer/Dialect/FIRType.h"
|
|
#include "flang/Optimizer/HLFIR/HLFIROps.h"
|
|
#include "flang/Parser/characters.h"
|
|
#include "flang/Parser/parse-tree.h"
|
|
#include "flang/Semantics/openmp-directive-sets.h"
|
|
#include "flang/Semantics/tools.h"
|
|
#include "flang/Support/OpenMP-utils.h"
|
|
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
|
|
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
|
|
#include "mlir/Transforms/RegionUtils.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/Frontend/OpenMP/OMPConstants.h"
|
|
|
|
using namespace Fortran::lower::omp;
|
|
using namespace Fortran::common::openmp;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Code generation helper functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static void genOMPDispatch(lower::AbstractConverter &converter,
|
|
lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item);
|
|
|
|
static void processHostEvalClauses(lower::AbstractConverter &converter,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::StatementContext &stmtCtx,
|
|
lower::pft::Evaluation &eval,
|
|
mlir::Location loc);
|
|
|
|
namespace {
|
|
/// Structure holding information that is needed to pass host-evaluated
|
|
/// information to later lowering stages.
|
|
class HostEvalInfo {
|
|
public:
|
|
// Allow this function access to private members in order to initialize them.
|
|
friend void ::processHostEvalClauses(lower::AbstractConverter &,
|
|
semantics::SemanticsContext &,
|
|
lower::StatementContext &,
|
|
lower::pft::Evaluation &,
|
|
mlir::Location);
|
|
|
|
/// Fill \c vars with values stored in \c ops.
|
|
///
|
|
/// The order in which values are stored matches the one expected by \see
|
|
/// bindOperands().
|
|
void collectValues(llvm::SmallVectorImpl<mlir::Value> &vars) const {
|
|
vars.append(ops.loopLowerBounds);
|
|
vars.append(ops.loopUpperBounds);
|
|
vars.append(ops.loopSteps);
|
|
|
|
if (ops.numTeamsLower)
|
|
vars.push_back(ops.numTeamsLower);
|
|
|
|
if (ops.numTeamsUpper)
|
|
vars.push_back(ops.numTeamsUpper);
|
|
|
|
if (ops.numThreads)
|
|
vars.push_back(ops.numThreads);
|
|
|
|
if (ops.threadLimit)
|
|
vars.push_back(ops.threadLimit);
|
|
}
|
|
|
|
/// Update \c ops, replacing all values with the corresponding block argument
|
|
/// in \c args.
|
|
///
|
|
/// The order in which values are stored in \c args is the same as the one
|
|
/// used by \see collectValues().
|
|
void bindOperands(llvm::ArrayRef<mlir::BlockArgument> args) {
|
|
assert(args.size() ==
|
|
ops.loopLowerBounds.size() + ops.loopUpperBounds.size() +
|
|
ops.loopSteps.size() + (ops.numTeamsLower ? 1 : 0) +
|
|
(ops.numTeamsUpper ? 1 : 0) + (ops.numThreads ? 1 : 0) +
|
|
(ops.threadLimit ? 1 : 0) &&
|
|
"invalid block argument list");
|
|
int argIndex = 0;
|
|
for (size_t i = 0; i < ops.loopLowerBounds.size(); ++i)
|
|
ops.loopLowerBounds[i] = args[argIndex++];
|
|
|
|
for (size_t i = 0; i < ops.loopUpperBounds.size(); ++i)
|
|
ops.loopUpperBounds[i] = args[argIndex++];
|
|
|
|
for (size_t i = 0; i < ops.loopSteps.size(); ++i)
|
|
ops.loopSteps[i] = args[argIndex++];
|
|
|
|
if (ops.numTeamsLower)
|
|
ops.numTeamsLower = args[argIndex++];
|
|
|
|
if (ops.numTeamsUpper)
|
|
ops.numTeamsUpper = args[argIndex++];
|
|
|
|
if (ops.numThreads)
|
|
ops.numThreads = args[argIndex++];
|
|
|
|
if (ops.threadLimit)
|
|
ops.threadLimit = args[argIndex++];
|
|
}
|
|
|
|
/// Update \p clauseOps and \p ivOut with the corresponding host-evaluated
|
|
/// values and Fortran symbols, respectively, if they have already been
|
|
/// initialized but not yet applied.
|
|
///
|
|
/// \returns whether an update was performed. If not, these clauses were not
|
|
/// evaluated in the host device.
|
|
bool apply(mlir::omp::LoopNestOperands &clauseOps,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &ivOut) {
|
|
if (iv.empty() || loopNestApplied) {
|
|
loopNestApplied = true;
|
|
return false;
|
|
}
|
|
|
|
loopNestApplied = true;
|
|
clauseOps.loopLowerBounds = ops.loopLowerBounds;
|
|
clauseOps.loopUpperBounds = ops.loopUpperBounds;
|
|
clauseOps.loopSteps = ops.loopSteps;
|
|
ivOut.append(iv);
|
|
return true;
|
|
}
|
|
|
|
/// Update \p clauseOps with the corresponding host-evaluated values if they
|
|
/// have already been initialized but not yet applied.
|
|
///
|
|
/// \returns whether an update was performed. If not, these clauses were not
|
|
/// evaluated in the host device.
|
|
bool apply(mlir::omp::ParallelOperands &clauseOps) {
|
|
if (!ops.numThreads || parallelApplied) {
|
|
parallelApplied = true;
|
|
return false;
|
|
}
|
|
|
|
parallelApplied = true;
|
|
clauseOps.numThreads = ops.numThreads;
|
|
return true;
|
|
}
|
|
|
|
/// Update \p clauseOps with the corresponding host-evaluated values if they
|
|
/// have already been initialized.
|
|
///
|
|
/// \returns whether an update was performed. If not, these clauses were not
|
|
/// evaluated in the host device.
|
|
bool apply(mlir::omp::TeamsOperands &clauseOps) {
|
|
if (!ops.numTeamsLower && !ops.numTeamsUpper && !ops.threadLimit)
|
|
return false;
|
|
|
|
clauseOps.numTeamsLower = ops.numTeamsLower;
|
|
clauseOps.numTeamsUpper = ops.numTeamsUpper;
|
|
clauseOps.threadLimit = ops.threadLimit;
|
|
return true;
|
|
}
|
|
|
|
private:
|
|
mlir::omp::HostEvaluatedOperands ops;
|
|
llvm::SmallVector<const semantics::Symbol *> iv;
|
|
bool loopNestApplied = false, parallelApplied = false;
|
|
};
|
|
} // namespace
|
|
|
|
/// Stack of \see HostEvalInfo to represent the current nest of \c omp.target
|
|
/// operations being created.
|
|
///
|
|
/// The current implementation prevents nested 'target' regions from breaking
|
|
/// the handling of the outer region by keeping a stack of information
|
|
/// structures, but it will probably still require some further work to support
|
|
/// reverse offloading.
|
|
static llvm::SmallVector<HostEvalInfo, 0> hostEvalInfo;
|
|
|
|
/// Bind symbols to their corresponding entry block arguments.
|
|
///
|
|
/// The binding will be performed inside of the current block, which does not
|
|
/// necessarily have to be part of the operation for which the binding is done.
|
|
/// However, block arguments must be accessible. This enables controlling the
|
|
/// insertion point of any new MLIR operations related to the binding of
|
|
/// arguments of a loop wrapper operation.
|
|
///
|
|
/// \param [in] converter - PFT to MLIR conversion interface.
|
|
/// \param [in] op - owner operation of the block arguments to bind.
|
|
/// \param [in] args - entry block arguments information for the given
|
|
/// operation.
|
|
static void bindEntryBlockArgs(lower::AbstractConverter &converter,
|
|
mlir::omp::BlockArgOpenMPOpInterface op,
|
|
const EntryBlockArgs &args) {
|
|
assert(op != nullptr && "invalid block argument-defining operation");
|
|
assert(args.isValid() && "invalid args");
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
|
|
auto bindSingleMapLike = [&converter,
|
|
&firOpBuilder](const semantics::Symbol &sym,
|
|
const mlir::BlockArgument &arg) {
|
|
// Clones the `bounds` placing them inside the entry block and returns
|
|
// them.
|
|
auto cloneBound = [&](mlir::Value bound) {
|
|
if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
|
|
mlir::Operation *clonedOp = firOpBuilder.clone(*bound.getDefiningOp());
|
|
return clonedOp->getResult(0);
|
|
}
|
|
TODO(converter.getCurrentLocation(),
|
|
"target map-like clause operand unsupported bound type");
|
|
};
|
|
|
|
auto cloneBounds = [cloneBound](llvm::ArrayRef<mlir::Value> bounds) {
|
|
llvm::SmallVector<mlir::Value> clonedBounds;
|
|
llvm::transform(bounds, std::back_inserter(clonedBounds),
|
|
[&](mlir::Value bound) { return cloneBound(bound); });
|
|
return clonedBounds;
|
|
};
|
|
|
|
fir::ExtendedValue extVal = converter.getSymbolExtendedValue(sym);
|
|
auto refType = mlir::dyn_cast<fir::ReferenceType>(arg.getType());
|
|
if (refType && fir::isa_builtin_cptr_type(refType.getElementType())) {
|
|
converter.bindSymbol(sym, arg);
|
|
} else {
|
|
extVal.match(
|
|
[&](const fir::BoxValue &v) {
|
|
converter.bindSymbol(sym,
|
|
fir::BoxValue(arg, cloneBounds(v.getLBounds()),
|
|
v.getExplicitParameters(),
|
|
v.getExplicitExtents()));
|
|
},
|
|
[&](const fir::MutableBoxValue &v) {
|
|
converter.bindSymbol(
|
|
sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()),
|
|
v.getMutableProperties()));
|
|
},
|
|
[&](const fir::ArrayBoxValue &v) {
|
|
converter.bindSymbol(
|
|
sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()),
|
|
cloneBounds(v.getLBounds()),
|
|
v.getSourceBox()));
|
|
},
|
|
[&](const fir::CharArrayBoxValue &v) {
|
|
converter.bindSymbol(
|
|
sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()),
|
|
cloneBounds(v.getExtents()),
|
|
cloneBounds(v.getLBounds())));
|
|
},
|
|
[&](const fir::CharBoxValue &v) {
|
|
converter.bindSymbol(
|
|
sym, fir::CharBoxValue(arg, cloneBound(v.getLen())));
|
|
},
|
|
[&](const fir::UnboxedValue &v) { converter.bindSymbol(sym, arg); },
|
|
[&](const auto &) {
|
|
TODO(converter.getCurrentLocation(),
|
|
"target map clause operand unsupported type");
|
|
});
|
|
}
|
|
};
|
|
|
|
auto bindMapLike =
|
|
[&bindSingleMapLike](llvm::ArrayRef<const semantics::Symbol *> syms,
|
|
llvm::ArrayRef<mlir::BlockArgument> args) {
|
|
// Structure component symbols don't have bindings, and can only be
|
|
// explicitly mapped individually. If a member is captured implicitly
|
|
// we map the entirety of the derived type when we find its symbol.
|
|
llvm::SmallVector<const semantics::Symbol *> processedSyms;
|
|
llvm::copy_if(syms, std::back_inserter(processedSyms),
|
|
[](auto *sym) { return !sym->owner().IsDerivedType(); });
|
|
|
|
for (auto [sym, arg] : llvm::zip_equal(processedSyms, args))
|
|
bindSingleMapLike(*sym, arg);
|
|
};
|
|
|
|
auto bindPrivateLike = [&converter, &firOpBuilder](
|
|
llvm::ArrayRef<const semantics::Symbol *> syms,
|
|
llvm::ArrayRef<mlir::Value> vars,
|
|
llvm::ArrayRef<mlir::BlockArgument> args) {
|
|
llvm::SmallVector<const semantics::Symbol *> processedSyms;
|
|
for (auto *sym : syms) {
|
|
if (const auto *commonDet =
|
|
sym->detailsIf<semantics::CommonBlockDetails>()) {
|
|
llvm::transform(commonDet->objects(), std::back_inserter(processedSyms),
|
|
[&](const auto &mem) { return &*mem; });
|
|
} else {
|
|
processedSyms.push_back(sym);
|
|
}
|
|
}
|
|
|
|
for (auto [sym, var, arg] : llvm::zip_equal(processedSyms, vars, args))
|
|
converter.bindSymbol(
|
|
*sym,
|
|
hlfir::translateToExtendedValue(
|
|
var.getLoc(), firOpBuilder, hlfir::Entity{arg},
|
|
/*contiguousHint=*/
|
|
evaluate::IsSimplyContiguous(*sym, converter.getFoldingContext()))
|
|
.first);
|
|
};
|
|
|
|
// Process in clause name alphabetical order to match block arguments order.
|
|
// Do not bind host_eval variables because they cannot be used inside of the
|
|
// corresponding region, except for very specific cases handled separately.
|
|
bindMapLike(args.hasDeviceAddr.syms, op.getHasDeviceAddrBlockArgs());
|
|
bindPrivateLike(args.inReduction.syms, args.inReduction.vars,
|
|
op.getInReductionBlockArgs());
|
|
bindMapLike(args.map.syms, op.getMapBlockArgs());
|
|
bindPrivateLike(args.priv.syms, args.priv.vars, op.getPrivateBlockArgs());
|
|
bindPrivateLike(args.reduction.syms, args.reduction.vars,
|
|
op.getReductionBlockArgs());
|
|
bindPrivateLike(args.taskReduction.syms, args.taskReduction.vars,
|
|
op.getTaskReductionBlockArgs());
|
|
bindMapLike(args.useDeviceAddr.syms, op.getUseDeviceAddrBlockArgs());
|
|
bindMapLike(args.useDevicePtr.syms, op.getUseDevicePtrBlockArgs());
|
|
}
|
|
|
|
/// Get the list of base values that the specified map-like variables point to.
|
|
///
|
|
/// This function must be kept in sync with changes to the `createMapInfoOp`
|
|
/// utility function, since it must take into account the potential introduction
|
|
/// of levels of indirection (i.e. intermediate ops).
|
|
///
|
|
/// \param [in] vars - list of values passed to map-like clauses, returned
|
|
/// by an `omp.map.info` operation.
|
|
/// \param [out] baseOps - populated with the `var_ptr` values of the
|
|
/// corresponding defining operations.
|
|
static void
|
|
extractMappedBaseValues(llvm::ArrayRef<mlir::Value> vars,
|
|
llvm::SmallVectorImpl<mlir::Value> &baseOps) {
|
|
llvm::transform(vars, std::back_inserter(baseOps), [](mlir::Value map) {
|
|
auto mapInfo = map.getDefiningOp<mlir::omp::MapInfoOp>();
|
|
assert(mapInfo && "expected all map vars to be defined by omp.map.info");
|
|
|
|
mlir::Value varPtr = mapInfo.getVarPtr();
|
|
if (auto boxAddr = varPtr.getDefiningOp<fir::BoxAddrOp>())
|
|
return boxAddr.getVal();
|
|
|
|
return varPtr;
|
|
});
|
|
}
|
|
|
|
/// Get the directive enumeration value corresponding to the given OpenMP
|
|
/// construct PFT node.
|
|
llvm::omp::Directive
|
|
extractOmpDirective(const parser::OpenMPConstruct &ompConstruct) {
|
|
return common::visit(
|
|
common::visitors{
|
|
[](const parser::OpenMPAllocatorsConstruct &c) {
|
|
return llvm::omp::OMPD_allocators;
|
|
},
|
|
[](const parser::OpenMPAssumeConstruct &c) {
|
|
return llvm::omp::OMPD_assume;
|
|
},
|
|
[](const parser::OpenMPAtomicConstruct &c) {
|
|
return llvm::omp::OMPD_atomic;
|
|
},
|
|
[](const parser::OpenMPBlockConstruct &c) {
|
|
return std::get<parser::OmpBlockDirective>(
|
|
std::get<parser::OmpBeginBlockDirective>(c.t).t)
|
|
.v;
|
|
},
|
|
[](const parser::OpenMPCriticalConstruct &c) {
|
|
return llvm::omp::OMPD_critical;
|
|
},
|
|
[](const parser::OpenMPDeclarativeAllocate &c) {
|
|
return llvm::omp::OMPD_allocate;
|
|
},
|
|
[](const parser::OpenMPDispatchConstruct &c) {
|
|
return llvm::omp::OMPD_dispatch;
|
|
},
|
|
[](const parser::OpenMPExecutableAllocate &c) {
|
|
return llvm::omp::OMPD_allocate;
|
|
},
|
|
[](const parser::OpenMPLoopConstruct &c) {
|
|
return std::get<parser::OmpLoopDirective>(
|
|
std::get<parser::OmpBeginLoopDirective>(c.t).t)
|
|
.v;
|
|
},
|
|
[](const parser::OpenMPSectionConstruct &c) {
|
|
return llvm::omp::OMPD_section;
|
|
},
|
|
[](const parser::OpenMPSectionsConstruct &c) {
|
|
return std::get<parser::OmpSectionsDirective>(
|
|
std::get<parser::OmpBeginSectionsDirective>(c.t).t)
|
|
.v;
|
|
},
|
|
[](const parser::OpenMPStandaloneConstruct &c) {
|
|
return common::visit(
|
|
common::visitors{
|
|
[](const parser::OpenMPSimpleStandaloneConstruct &c) {
|
|
return c.v.DirId();
|
|
},
|
|
[](const parser::OpenMPFlushConstruct &c) {
|
|
return llvm::omp::OMPD_flush;
|
|
},
|
|
[](const parser::OpenMPCancelConstruct &c) {
|
|
return llvm::omp::OMPD_cancel;
|
|
},
|
|
[](const parser::OpenMPCancellationPointConstruct &c) {
|
|
return llvm::omp::OMPD_cancellation_point;
|
|
},
|
|
[](const parser::OmpMetadirectiveDirective &c) {
|
|
return llvm::omp::OMPD_metadirective;
|
|
},
|
|
[](const parser::OpenMPDepobjConstruct &c) {
|
|
return llvm::omp::OMPD_depobj;
|
|
},
|
|
[](const parser::OpenMPInteropConstruct &c) {
|
|
return llvm::omp::OMPD_interop;
|
|
}},
|
|
c.u);
|
|
},
|
|
[](const parser::OpenMPUtilityConstruct &c) {
|
|
return common::visit(
|
|
common::visitors{[](const parser::OmpErrorDirective &c) {
|
|
return llvm::omp::OMPD_error;
|
|
},
|
|
[](const parser::OmpNothingDirective &c) {
|
|
return llvm::omp::OMPD_nothing;
|
|
}},
|
|
c.u);
|
|
}},
|
|
ompConstruct.u);
|
|
}
|
|
|
|
/// Populate the global \see hostEvalInfo after processing clauses for the given
|
|
/// \p eval OpenMP target construct, or nested constructs, if these must be
|
|
/// evaluated outside of the target region per the spec.
|
|
///
|
|
/// In particular, this will ensure that in 'target teams' and equivalent nested
|
|
/// constructs, the \c thread_limit and \c num_teams clauses will be evaluated
|
|
/// in the host. Additionally, loop bounds, steps and the \c num_threads clause
|
|
/// will also be evaluated in the host if a target SPMD construct is detected
|
|
/// (i.e. 'target teams distribute parallel do [simd]' or equivalent nesting).
|
|
///
|
|
/// The result, stored as a global, is intended to be used to populate the \c
|
|
/// host_eval operands of the associated \c omp.target operation, and also to be
|
|
/// checked and used by later lowering steps to populate the corresponding
|
|
/// operands of the \c omp.teams, \c omp.parallel or \c omp.loop_nest
|
|
/// operations.
|
|
static void processHostEvalClauses(lower::AbstractConverter &converter,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::StatementContext &stmtCtx,
|
|
lower::pft::Evaluation &eval,
|
|
mlir::Location loc) {
|
|
// Obtain the list of clauses of the given OpenMP block or loop construct
|
|
// evaluation. Other evaluations passed to this lambda keep `clauses`
|
|
// unchanged.
|
|
auto extractClauses = [&semaCtx](lower::pft::Evaluation &eval,
|
|
List<Clause> &clauses) {
|
|
const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
|
|
if (!ompEval)
|
|
return;
|
|
|
|
const parser::OmpClauseList *beginClauseList = nullptr;
|
|
const parser::OmpClauseList *endClauseList = nullptr;
|
|
common::visit(
|
|
common::visitors{
|
|
[&](const parser::OpenMPBlockConstruct &ompConstruct) {
|
|
const auto &beginDirective =
|
|
std::get<parser::OmpBeginBlockDirective>(ompConstruct.t);
|
|
beginClauseList =
|
|
&std::get<parser::OmpClauseList>(beginDirective.t);
|
|
endClauseList = &std::get<parser::OmpClauseList>(
|
|
std::get<parser::OmpEndBlockDirective>(ompConstruct.t).t);
|
|
},
|
|
[&](const parser::OpenMPLoopConstruct &ompConstruct) {
|
|
const auto &beginDirective =
|
|
std::get<parser::OmpBeginLoopDirective>(ompConstruct.t);
|
|
beginClauseList =
|
|
&std::get<parser::OmpClauseList>(beginDirective.t);
|
|
|
|
if (auto &endDirective =
|
|
std::get<std::optional<parser::OmpEndLoopDirective>>(
|
|
ompConstruct.t))
|
|
endClauseList =
|
|
&std::get<parser::OmpClauseList>(endDirective->t);
|
|
},
|
|
[&](const auto &) {}},
|
|
ompEval->u);
|
|
|
|
assert(beginClauseList && "expected begin directive");
|
|
clauses.append(makeClauses(*beginClauseList, semaCtx));
|
|
|
|
if (endClauseList)
|
|
clauses.append(makeClauses(*endClauseList, semaCtx));
|
|
};
|
|
|
|
// Return the directive that is immediately nested inside of the given
|
|
// `parent` evaluation, if it is its only non-end-statement nested evaluation
|
|
// and it represents an OpenMP construct.
|
|
auto extractOnlyOmpNestedDir = [](lower::pft::Evaluation &parent)
|
|
-> std::optional<llvm::omp::Directive> {
|
|
if (!parent.hasNestedEvaluations())
|
|
return std::nullopt;
|
|
|
|
llvm::omp::Directive dir;
|
|
auto &nested = parent.getFirstNestedEvaluation();
|
|
if (const auto *ompEval = nested.getIf<parser::OpenMPConstruct>())
|
|
dir = extractOmpDirective(*ompEval);
|
|
else
|
|
return std::nullopt;
|
|
|
|
for (auto &sibling : parent.getNestedEvaluations())
|
|
if (&sibling != &nested && !sibling.isEndStmt())
|
|
return std::nullopt;
|
|
|
|
return dir;
|
|
};
|
|
|
|
// Process the given evaluation assuming it's part of a 'target' construct or
|
|
// captured by one, and store results in the global `hostEvalInfo`.
|
|
std::function<void(lower::pft::Evaluation &, const List<Clause> &)>
|
|
processEval;
|
|
processEval = [&](lower::pft::Evaluation &eval, const List<Clause> &clauses) {
|
|
using namespace llvm::omp;
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
|
|
// Call `processEval` recursively with the immediately nested evaluation and
|
|
// its corresponding clauses if there is a single nested evaluation
|
|
// representing an OpenMP directive that passes the given test.
|
|
auto processSingleNestedIf = [&](llvm::function_ref<bool(Directive)> test) {
|
|
std::optional<Directive> nestedDir = extractOnlyOmpNestedDir(eval);
|
|
if (!nestedDir || !test(*nestedDir))
|
|
return;
|
|
|
|
lower::pft::Evaluation &nestedEval = eval.getFirstNestedEvaluation();
|
|
List<lower::omp::Clause> nestedClauses;
|
|
extractClauses(nestedEval, nestedClauses);
|
|
processEval(nestedEval, nestedClauses);
|
|
};
|
|
|
|
const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
|
|
if (!ompEval)
|
|
return;
|
|
|
|
HostEvalInfo &hostInfo = hostEvalInfo.back();
|
|
|
|
switch (extractOmpDirective(*ompEval)) {
|
|
case OMPD_teams_distribute_parallel_do:
|
|
case OMPD_teams_distribute_parallel_do_simd:
|
|
cp.processThreadLimit(stmtCtx, hostInfo.ops);
|
|
[[fallthrough]];
|
|
case OMPD_target_teams_distribute_parallel_do:
|
|
case OMPD_target_teams_distribute_parallel_do_simd:
|
|
cp.processNumTeams(stmtCtx, hostInfo.ops);
|
|
[[fallthrough]];
|
|
case OMPD_distribute_parallel_do:
|
|
case OMPD_distribute_parallel_do_simd:
|
|
cp.processNumThreads(stmtCtx, hostInfo.ops);
|
|
[[fallthrough]];
|
|
case OMPD_distribute:
|
|
case OMPD_distribute_simd:
|
|
cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
|
|
break;
|
|
|
|
case OMPD_teams:
|
|
cp.processThreadLimit(stmtCtx, hostInfo.ops);
|
|
[[fallthrough]];
|
|
case OMPD_target_teams:
|
|
cp.processNumTeams(stmtCtx, hostInfo.ops);
|
|
processSingleNestedIf([](Directive nestedDir) {
|
|
return topDistributeSet.test(nestedDir) || topLoopSet.test(nestedDir);
|
|
});
|
|
break;
|
|
|
|
case OMPD_teams_distribute:
|
|
case OMPD_teams_distribute_simd:
|
|
cp.processThreadLimit(stmtCtx, hostInfo.ops);
|
|
[[fallthrough]];
|
|
case OMPD_target_teams_distribute:
|
|
case OMPD_target_teams_distribute_simd:
|
|
cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
|
|
cp.processNumTeams(stmtCtx, hostInfo.ops);
|
|
break;
|
|
|
|
case OMPD_teams_loop:
|
|
cp.processThreadLimit(stmtCtx, hostInfo.ops);
|
|
[[fallthrough]];
|
|
case OMPD_target_teams_loop:
|
|
cp.processNumTeams(stmtCtx, hostInfo.ops);
|
|
[[fallthrough]];
|
|
case OMPD_loop:
|
|
cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
|
|
break;
|
|
|
|
// Standalone 'target' case.
|
|
case OMPD_target: {
|
|
processSingleNestedIf(
|
|
[](Directive nestedDir) { return topTeamsSet.test(nestedDir); });
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
};
|
|
|
|
assert(!hostEvalInfo.empty() && "expected HOST_EVAL info structure");
|
|
|
|
const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
|
|
assert(ompEval &&
|
|
llvm::omp::allTargetSet.test(extractOmpDirective(*ompEval)) &&
|
|
"expected TARGET construct evaluation");
|
|
(void)ompEval;
|
|
|
|
// Use the whole list of clauses passed to the construct here, rather than the
|
|
// ones only applied to omp.target.
|
|
List<lower::omp::Clause> clauses;
|
|
extractClauses(eval, clauses);
|
|
processEval(eval, clauses);
|
|
}
|
|
|
|
static lower::pft::Evaluation *
|
|
getCollapsedLoopEval(lower::pft::Evaluation &eval, int collapseValue) {
|
|
// Return the Evaluation of the innermost collapsed loop, or the current one
|
|
// if there was no COLLAPSE.
|
|
if (collapseValue == 0)
|
|
return &eval;
|
|
|
|
lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation();
|
|
for (int i = 1; i < collapseValue; i++) {
|
|
// The nested evaluations should be DoConstructs (i.e. they should form
|
|
// a loop nest). Each DoConstruct is a tuple <NonLabelDoStmt, Block,
|
|
// EndDoStmt>.
|
|
assert(curEval->isA<parser::DoConstruct>());
|
|
curEval = &*std::next(curEval->getNestedEvaluations().begin());
|
|
}
|
|
return curEval;
|
|
}
|
|
|
|
static void genNestedEvaluations(lower::AbstractConverter &converter,
|
|
lower::pft::Evaluation &eval,
|
|
int collapseValue = 0) {
|
|
lower::pft::Evaluation *curEval = getCollapsedLoopEval(eval, collapseValue);
|
|
|
|
for (lower::pft::Evaluation &e : curEval->getNestedEvaluations())
|
|
converter.genEval(e);
|
|
}
|
|
|
|
static fir::GlobalOp globalInitialization(lower::AbstractConverter &converter,
|
|
fir::FirOpBuilder &firOpBuilder,
|
|
const semantics::Symbol &sym,
|
|
const lower::pft::Variable &var,
|
|
mlir::Location currentLocation) {
|
|
std::string globalName = converter.mangleName(sym);
|
|
mlir::StringAttr linkage = firOpBuilder.createInternalLinkage();
|
|
return Fortran::lower::defineGlobal(converter, var, globalName, linkage);
|
|
}
|
|
|
|
// Get the extended value for \p val by extracting additional variable
|
|
// information from \p base.
|
|
static fir::ExtendedValue getExtendedValue(fir::ExtendedValue base,
|
|
mlir::Value val) {
|
|
return base.match(
|
|
[&](const fir::MutableBoxValue &box) -> fir::ExtendedValue {
|
|
return fir::MutableBoxValue(val, box.nonDeferredLenParams(), {});
|
|
},
|
|
[&](const auto &) -> fir::ExtendedValue {
|
|
return fir::substBase(base, val);
|
|
});
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
static bool isThreadPrivate(lower::SymbolRef sym) {
|
|
if (const auto *details = sym->detailsIf<semantics::CommonBlockDetails>()) {
|
|
for (const auto &obj : details->objects())
|
|
if (!obj->test(semantics::Symbol::Flag::OmpThreadprivate))
|
|
return false;
|
|
return true;
|
|
}
|
|
return sym->test(semantics::Symbol::Flag::OmpThreadprivate);
|
|
}
|
|
#endif
|
|
|
|
static void threadPrivatizeVars(lower::AbstractConverter &converter,
|
|
lower::pft::Evaluation &eval) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
mlir::Location currentLocation = converter.getCurrentLocation();
|
|
mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
|
|
firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
|
|
|
|
// If the symbol corresponds to the original ThreadprivateOp, use the symbol
|
|
// value from that operation to create one ThreadprivateOp copy operation
|
|
// inside the parallel region.
|
|
// In some cases, however, the symbol will correspond to the original,
|
|
// non-threadprivate variable. This can happen, for instance, with a common
|
|
// block, declared in a separate module, used by a parent procedure and
|
|
// privatized in its child procedure.
|
|
auto genThreadprivateOp = [&](lower::SymbolRef sym) -> mlir::Value {
|
|
assert(isThreadPrivate(sym));
|
|
mlir::Value symValue = converter.getSymbolAddress(sym);
|
|
mlir::Operation *op = symValue.getDefiningOp();
|
|
if (auto declOp = mlir::dyn_cast<hlfir::DeclareOp>(op))
|
|
op = declOp.getMemref().getDefiningOp();
|
|
if (mlir::isa<mlir::omp::ThreadprivateOp>(op))
|
|
symValue = mlir::dyn_cast<mlir::omp::ThreadprivateOp>(op).getSymAddr();
|
|
return firOpBuilder.create<mlir::omp::ThreadprivateOp>(
|
|
currentLocation, symValue.getType(), symValue);
|
|
};
|
|
|
|
llvm::SetVector<const semantics::Symbol *> threadprivateSyms;
|
|
converter.collectSymbolSet(eval, threadprivateSyms,
|
|
semantics::Symbol::Flag::OmpThreadprivate,
|
|
/*collectSymbols=*/true,
|
|
/*collectHostAssociatedSymbols=*/true);
|
|
std::set<semantics::SourceName> threadprivateSymNames;
|
|
|
|
// For a COMMON block, the ThreadprivateOp is generated for itself instead of
|
|
// its members, so only bind the value of the new copied ThreadprivateOp
|
|
// inside the parallel region to the common block symbol only once for
|
|
// multiple members in one COMMON block.
|
|
llvm::SetVector<const semantics::Symbol *> commonSyms;
|
|
for (std::size_t i = 0; i < threadprivateSyms.size(); i++) {
|
|
const semantics::Symbol *sym = threadprivateSyms[i];
|
|
mlir::Value symThreadprivateValue;
|
|
// The variable may be used more than once, and each reference has one
|
|
// symbol with the same name. Only do once for references of one variable.
|
|
if (threadprivateSymNames.find(sym->name()) != threadprivateSymNames.end())
|
|
continue;
|
|
threadprivateSymNames.insert(sym->name());
|
|
if (const semantics::Symbol *common =
|
|
semantics::FindCommonBlockContaining(sym->GetUltimate())) {
|
|
mlir::Value commonThreadprivateValue;
|
|
if (commonSyms.contains(common)) {
|
|
commonThreadprivateValue = converter.getSymbolAddress(*common);
|
|
} else {
|
|
commonThreadprivateValue = genThreadprivateOp(*common);
|
|
converter.bindSymbol(*common, commonThreadprivateValue);
|
|
commonSyms.insert(common);
|
|
}
|
|
symThreadprivateValue = lower::genCommonBlockMember(
|
|
converter, currentLocation, sym->GetUltimate(),
|
|
commonThreadprivateValue);
|
|
} else {
|
|
symThreadprivateValue = genThreadprivateOp(*sym);
|
|
}
|
|
|
|
fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*sym);
|
|
fir::ExtendedValue symThreadprivateExv =
|
|
getExtendedValue(sexv, symThreadprivateValue);
|
|
converter.bindSymbol(*sym, symThreadprivateExv);
|
|
}
|
|
}
|
|
|
|
static mlir::Operation *
|
|
createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter,
|
|
mlir::Location loc, mlir::Value indexVal,
|
|
const semantics::Symbol *sym) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
|
|
firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
|
|
|
|
mlir::Type tempTy = converter.genType(*sym);
|
|
|
|
assert(converter.isPresentShallowLookup(*sym) &&
|
|
"Expected symbol to be in symbol table.");
|
|
|
|
firOpBuilder.restoreInsertionPoint(insPt);
|
|
mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal);
|
|
hlfir::Entity lhs{converter.getSymbolAddress(*sym)};
|
|
|
|
lhs = hlfir::derefPointersAndAllocatables(loc, firOpBuilder, lhs);
|
|
|
|
mlir::Operation *storeOp =
|
|
firOpBuilder.create<hlfir::AssignOp>(loc, cvtVal, lhs);
|
|
return storeOp;
|
|
}
|
|
|
|
// This helper function implements the functionality of "promoting" non-CPTR
|
|
// arguments of use_device_ptr to use_device_addr arguments (automagic
|
|
// conversion of use_device_ptr -> use_device_addr in these cases). The way we
|
|
// do so currently is through the shuffling of operands from the
|
|
// devicePtrOperands to deviceAddrOperands, as well as the types, locations and
|
|
// symbols.
|
|
//
|
|
// This effectively implements some deprecated OpenMP functionality that some
|
|
// legacy applications unfortunately depend on (deprecated in specification
|
|
// version 5.2):
|
|
//
|
|
// "If a list item in a use_device_ptr clause is not of type C_PTR, the behavior
|
|
// is as if the list item appeared in a use_device_addr clause. Support for
|
|
// such list items in a use_device_ptr clause is deprecated."
|
|
static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr(
|
|
llvm::SmallVectorImpl<mlir::Value> &useDeviceAddrVars,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceAddrSyms,
|
|
llvm::SmallVectorImpl<mlir::Value> &useDevicePtrVars,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &useDevicePtrSyms) {
|
|
// Iterate over our use_device_ptr list and shift all non-cptr arguments into
|
|
// use_device_addr.
|
|
auto *varIt = useDevicePtrVars.begin();
|
|
auto *symIt = useDevicePtrSyms.begin();
|
|
while (varIt != useDevicePtrVars.end()) {
|
|
if (fir::isa_builtin_cptr_type(fir::unwrapRefType(varIt->getType()))) {
|
|
++varIt;
|
|
++symIt;
|
|
continue;
|
|
}
|
|
|
|
useDeviceAddrVars.push_back(*varIt);
|
|
useDeviceAddrSyms.push_back(*symIt);
|
|
|
|
varIt = useDevicePtrVars.erase(varIt);
|
|
symIt = useDevicePtrSyms.erase(symIt);
|
|
}
|
|
}
|
|
|
|
/// Extract the list of function and variable symbols affected by the given
|
|
/// 'declare target' directive and return the intended device type for them.
|
|
static void getDeclareTargetInfo(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
|
|
mlir::omp::DeclareTargetOperands &clauseOps,
|
|
llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause) {
|
|
const auto &spec =
|
|
std::get<parser::OmpDeclareTargetSpecifier>(declareTargetConstruct.t);
|
|
if (const auto *objectList{parser::Unwrap<parser::OmpObjectList>(spec.u)}) {
|
|
ObjectList objects{makeObjects(*objectList, semaCtx)};
|
|
// Case: declare target(func, var1, var2)
|
|
gatherFuncAndVarSyms(objects, mlir::omp::DeclareTargetCaptureClause::to,
|
|
symbolAndClause);
|
|
} else if (const auto *clauseList{
|
|
parser::Unwrap<parser::OmpClauseList>(spec.u)}) {
|
|
List<Clause> clauses = makeClauses(*clauseList, semaCtx);
|
|
if (clauses.empty()) {
|
|
Fortran::lower::pft::FunctionLikeUnit *owningProc =
|
|
eval.getOwningProcedure();
|
|
if (owningProc && (!owningProc->isMainProgram() ||
|
|
owningProc->getMainProgramSymbol())) {
|
|
// Case: declare target, implicit capture of function
|
|
symbolAndClause.emplace_back(mlir::omp::DeclareTargetCaptureClause::to,
|
|
owningProc->getSubprogramSymbol());
|
|
}
|
|
}
|
|
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processDeviceType(clauseOps);
|
|
cp.processEnter(symbolAndClause);
|
|
cp.processLink(symbolAndClause);
|
|
cp.processTo(symbolAndClause);
|
|
|
|
cp.processTODO<clause::Indirect>(converter.getCurrentLocation(),
|
|
llvm::omp::Directive::OMPD_declare_target);
|
|
}
|
|
}
|
|
|
|
static void collectDeferredDeclareTargets(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
|
|
llvm::SmallVectorImpl<lower::OMPDeferredDeclareTargetInfo>
|
|
&deferredDeclareTarget) {
|
|
mlir::omp::DeclareTargetOperands clauseOps;
|
|
llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
|
|
getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct,
|
|
clauseOps, symbolAndClause);
|
|
// Return the device type only if at least one of the targets for the
|
|
// directive is a function or subroutine
|
|
mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
|
|
|
|
for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
|
|
mlir::Operation *op = mod.lookupSymbol(
|
|
converter.mangleName(std::get<const semantics::Symbol &>(symClause)));
|
|
|
|
if (!op) {
|
|
deferredDeclareTarget.push_back({std::get<0>(symClause),
|
|
clauseOps.deviceType,
|
|
std::get<1>(symClause)});
|
|
}
|
|
}
|
|
}
|
|
|
|
static std::optional<mlir::omp::DeclareTargetDeviceType>
|
|
getDeclareTargetFunctionDevice(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) {
|
|
mlir::omp::DeclareTargetOperands clauseOps;
|
|
llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
|
|
getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct,
|
|
clauseOps, symbolAndClause);
|
|
|
|
// Return the device type only if at least one of the targets for the
|
|
// directive is a function or subroutine
|
|
mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
|
|
for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
|
|
mlir::Operation *op = mod.lookupSymbol(
|
|
converter.mangleName(std::get<const semantics::Symbol &>(symClause)));
|
|
|
|
if (mlir::isa_and_nonnull<mlir::func::FuncOp>(op))
|
|
return clauseOps.deviceType;
|
|
}
|
|
|
|
return std::nullopt;
|
|
}
|
|
|
|
/// Set up the entry block of the given `omp.loop_nest` operation, adding a
|
|
/// block argument for each loop induction variable and allocating and
|
|
/// initializing a private value to hold each of them.
|
|
///
|
|
/// This function can also bind the symbols of any variables that should match
|
|
/// block arguments on parent loop wrapper operations attached to the same
|
|
/// loop. This allows the introduction of any necessary `hlfir.declare`
|
|
/// operations inside of the entry block of the `omp.loop_nest` operation and
|
|
/// not directly under any of the wrappers, which would invalidate them.
|
|
///
|
|
/// \param [in] op - the loop nest operation.
|
|
/// \param [in] converter - PFT to MLIR conversion interface.
|
|
/// \param [in] loc - location.
|
|
/// \param [in] args - symbols of induction variables.
|
|
/// \param [in] wrapperArgs - list of parent loop wrappers and their associated
|
|
/// entry block arguments.
|
|
static void genLoopVars(
|
|
mlir::Operation *op, lower::AbstractConverter &converter,
|
|
mlir::Location &loc, llvm::ArrayRef<const semantics::Symbol *> args,
|
|
llvm::ArrayRef<
|
|
std::pair<mlir::omp::BlockArgOpenMPOpInterface, const EntryBlockArgs &>>
|
|
wrapperArgs = {}) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
auto ®ion = op->getRegion(0);
|
|
|
|
std::size_t loopVarTypeSize = 0;
|
|
for (const semantics::Symbol *arg : args)
|
|
loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
|
|
mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
|
|
llvm::SmallVector<mlir::Type> tiv(args.size(), loopVarType);
|
|
llvm::SmallVector<mlir::Location> locs(args.size(), loc);
|
|
firOpBuilder.createBlock(®ion, {}, tiv, locs);
|
|
|
|
// Update nested wrapper operands if parent wrappers have mapped these values
|
|
// to block arguments.
|
|
//
|
|
// Binding these values earlier would take care of this, but we cannot rely on
|
|
// that approach because binding in between the creation of a wrapper and the
|
|
// next one would result in 'hlfir.declare' operations being introduced inside
|
|
// of a wrapper, which is illegal.
|
|
mlir::IRMapping mapper;
|
|
for (auto [argGeneratingOp, blockArgs] : wrapperArgs) {
|
|
for (mlir::OpOperand &operand : argGeneratingOp->getOpOperands())
|
|
operand.set(mapper.lookupOrDefault(operand.get()));
|
|
|
|
for (const auto [arg, var] : llvm::zip_equal(
|
|
argGeneratingOp->getRegion(0).getArguments(), blockArgs.getVars()))
|
|
mapper.map(var, arg);
|
|
}
|
|
|
|
// Bind the entry block arguments of parent wrappers to the corresponding
|
|
// symbols.
|
|
for (auto [argGeneratingOp, blockArgs] : wrapperArgs)
|
|
bindEntryBlockArgs(converter, argGeneratingOp, blockArgs);
|
|
|
|
// The argument is not currently in memory, so make a temporary for the
|
|
// argument, and store it there, then bind that location to the argument.
|
|
mlir::Operation *storeOp = nullptr;
|
|
for (auto [argIndex, argSymbol] : llvm::enumerate(args)) {
|
|
mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex));
|
|
storeOp =
|
|
createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
|
|
}
|
|
firOpBuilder.setInsertionPointAfter(storeOp);
|
|
}
|
|
|
|
static void
|
|
markDeclareTarget(mlir::Operation *op, lower::AbstractConverter &converter,
|
|
mlir::omp::DeclareTargetCaptureClause captureClause,
|
|
mlir::omp::DeclareTargetDeviceType deviceType) {
|
|
// TODO: Add support for program local variables with declare target applied
|
|
auto declareTargetOp = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(op);
|
|
if (!declareTargetOp)
|
|
fir::emitFatalError(
|
|
converter.getCurrentLocation(),
|
|
"Attempt to apply declare target on unsupported operation");
|
|
|
|
// The function or global already has a declare target applied to it, very
|
|
// likely through implicit capture (usage in another declare target
|
|
// function/subroutine). It should be marked as any if it has been assigned
|
|
// both host and nohost, else we skip, as there is no change
|
|
if (declareTargetOp.isDeclareTarget()) {
|
|
if (declareTargetOp.getDeclareTargetDeviceType() != deviceType)
|
|
declareTargetOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::any,
|
|
captureClause);
|
|
return;
|
|
}
|
|
|
|
declareTargetOp.setDeclareTarget(deviceType, captureClause);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Op body generation helper structures and functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
struct OpWithBodyGenInfo {
|
|
/// A type for a code-gen callback function. This takes as argument the op for
|
|
/// which the code is being generated and returns the arguments of the op's
|
|
/// region.
|
|
using GenOMPRegionEntryCBFn =
|
|
std::function<llvm::SmallVector<const semantics::Symbol *>(
|
|
mlir::Operation *)>;
|
|
|
|
OpWithBodyGenInfo(lower::AbstractConverter &converter,
|
|
lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, mlir::Location loc,
|
|
lower::pft::Evaluation &eval, llvm::omp::Directive dir)
|
|
: converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc),
|
|
eval(eval), dir(dir) {}
|
|
|
|
OpWithBodyGenInfo &setClauses(const List<Clause> *value) {
|
|
clauses = value;
|
|
return *this;
|
|
}
|
|
|
|
OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) {
|
|
dsp = value;
|
|
return *this;
|
|
}
|
|
|
|
OpWithBodyGenInfo &setEntryBlockArgs(const EntryBlockArgs *value) {
|
|
blockArgs = value;
|
|
return *this;
|
|
}
|
|
|
|
OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) {
|
|
genRegionEntryCB = value;
|
|
return *this;
|
|
}
|
|
|
|
OpWithBodyGenInfo &setGenSkeletonOnly(bool value) {
|
|
genSkeletonOnly = value;
|
|
return *this;
|
|
}
|
|
|
|
/// [inout] converter to use for the clauses.
|
|
lower::AbstractConverter &converter;
|
|
/// [in] Symbol table
|
|
lower::SymMap &symTable;
|
|
/// [in] Semantics context
|
|
semantics::SemanticsContext &semaCtx;
|
|
/// [in] location in source code.
|
|
mlir::Location loc;
|
|
/// [in] current PFT node/evaluation.
|
|
lower::pft::Evaluation &eval;
|
|
/// [in] leaf directive for which to generate the op body.
|
|
llvm::omp::Directive dir;
|
|
/// [in] list of clauses to process.
|
|
const List<Clause> *clauses = nullptr;
|
|
/// [in] if provided, processes the construct's data-sharing attributes.
|
|
DataSharingProcessor *dsp = nullptr;
|
|
/// [in] if provided, it is used to create the op's region entry block. It is
|
|
/// overriden when a \see genRegionEntryCB is provided. This is only valid for
|
|
/// operations implementing the \see mlir::omp::BlockArgOpenMPOpInterface.
|
|
const EntryBlockArgs *blockArgs = nullptr;
|
|
/// [in] if provided, it overrides the default op's region entry block
|
|
/// creation.
|
|
GenOMPRegionEntryCBFn genRegionEntryCB = nullptr;
|
|
/// [in] if set to `true`, skip generating nested evaluations and dispatching
|
|
/// any further leaf constructs.
|
|
bool genSkeletonOnly = false;
|
|
};
|
|
|
|
/// Create the body (block) for an OpenMP Operation.
|
|
///
|
|
/// \param [in] op - the operation the body belongs to.
|
|
/// \param [in] info - options controlling code-gen for the construction.
|
|
/// \param [in] queue - work queue with nested constructs.
|
|
/// \param [in] item - item in the queue to generate body for.
|
|
static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
|
|
const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder();
|
|
|
|
auto insertMarker = [](fir::FirOpBuilder &builder) {
|
|
mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(),
|
|
builder.getIndexType());
|
|
return undef.getDefiningOp();
|
|
};
|
|
|
|
// Create the entry block for the region and collect its arguments for use
|
|
// within the region. The entry block will be created as follows:
|
|
// - By default, it will be empty and have no arguments.
|
|
// - Operations implementing the omp::BlockArgOpenMPOpInterface can set the
|
|
// `info.blockArgs` pointer so that block arguments will be those
|
|
// corresponding to entry block argument-generating clauses. Binding of
|
|
// Fortran symbols to the new MLIR values is done automatically.
|
|
// - If the `info.genRegionEntryCB` callback is set, it takes precedence and
|
|
// allows callers to manually create the entry block with its intended
|
|
// list of arguments and to bind these arguments to their corresponding
|
|
// Fortran symbols. This is used for e.g. loop induction variables.
|
|
auto regionArgs = [&]() -> llvm::SmallVector<const semantics::Symbol *> {
|
|
if (info.genRegionEntryCB)
|
|
return info.genRegionEntryCB(&op);
|
|
|
|
if (info.blockArgs) {
|
|
genEntryBlock(firOpBuilder, *info.blockArgs, op.getRegion(0));
|
|
bindEntryBlockArgs(info.converter,
|
|
llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op),
|
|
*info.blockArgs);
|
|
return llvm::to_vector(info.blockArgs->getSyms());
|
|
}
|
|
|
|
firOpBuilder.createBlock(&op.getRegion(0));
|
|
return {};
|
|
}();
|
|
|
|
// Mark the earliest insertion point.
|
|
mlir::Operation *marker = insertMarker(firOpBuilder);
|
|
|
|
// If it is an unstructured region, create empty blocks for all evaluations.
|
|
if (lower::omp::isLastItemInQueue(item, queue) &&
|
|
info.eval.lowerAsUnstructured()) {
|
|
lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>(
|
|
firOpBuilder, info.eval.getNestedEvaluations());
|
|
}
|
|
|
|
// Start with privatization, so that the lowering of the nested
|
|
// code will use the right symbols.
|
|
bool isLoop = llvm::omp::getDirectiveAssociation(info.dir) ==
|
|
llvm::omp::Association::Loop;
|
|
bool privatize = info.clauses;
|
|
|
|
firOpBuilder.setInsertionPoint(marker);
|
|
std::optional<DataSharingProcessor> tempDsp;
|
|
if (privatize && !info.dsp) {
|
|
tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval,
|
|
Fortran::lower::omp::isLastItemInQueue(item, queue),
|
|
/*useDelayedPrivatization=*/false, info.symTable);
|
|
tempDsp->processStep1();
|
|
}
|
|
|
|
if (info.dir == llvm::omp::Directive::OMPD_parallel) {
|
|
threadPrivatizeVars(info.converter, info.eval);
|
|
if (info.clauses) {
|
|
firOpBuilder.setInsertionPoint(marker);
|
|
ClauseProcessor(info.converter, info.semaCtx, *info.clauses)
|
|
.processCopyin();
|
|
}
|
|
}
|
|
|
|
if (!info.genSkeletonOnly) {
|
|
if (ConstructQueue::const_iterator next = std::next(item);
|
|
next != queue.end()) {
|
|
genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval,
|
|
info.loc, queue, next);
|
|
} else {
|
|
// genFIR(Evaluation&) tries to patch up unterminated blocks, causing
|
|
// a lot of complications for our approach if the terminator generation
|
|
// is delayed past this point. Insert a temporary terminator here, then
|
|
// delete it.
|
|
firOpBuilder.setInsertionPointToEnd(&op.getRegion(0).back());
|
|
auto *temp = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc);
|
|
firOpBuilder.setInsertionPointAfter(marker);
|
|
genNestedEvaluations(info.converter, info.eval);
|
|
temp->erase();
|
|
}
|
|
}
|
|
|
|
// Get or create a unique exiting block from the given region, or
|
|
// return nullptr if there is no exiting block.
|
|
auto getUniqueExit = [&](mlir::Region ®ion) -> mlir::Block * {
|
|
// Find the blocks where the OMP terminator should go. In simple cases
|
|
// it is the single block in the operation's region. When the region
|
|
// is more complicated, especially with unstructured control flow, there
|
|
// may be multiple blocks, and some of them may have non-OMP terminators
|
|
// resulting from lowering of the code contained within the operation.
|
|
// All the remaining blocks are potential exit points from the op's region.
|
|
//
|
|
// Explicit control flow cannot exit any OpenMP region (other than via
|
|
// STOP), and that is enforced by semantic checks prior to lowering. STOP
|
|
// statements are lowered to a function call.
|
|
|
|
// Collect unterminated blocks.
|
|
llvm::SmallVector<mlir::Block *> exits;
|
|
for (mlir::Block &b : region) {
|
|
if (b.empty() || !b.back().hasTrait<mlir::OpTrait::IsTerminator>())
|
|
exits.push_back(&b);
|
|
}
|
|
|
|
if (exits.empty())
|
|
return nullptr;
|
|
// If there already is a unique exiting block, do not create another one.
|
|
// Additionally, some ops (e.g. omp.sections) require only 1 block in
|
|
// its region.
|
|
if (exits.size() == 1)
|
|
return exits[0];
|
|
mlir::Block *exit = firOpBuilder.createBlock(®ion);
|
|
for (mlir::Block *b : exits) {
|
|
firOpBuilder.setInsertionPointToEnd(b);
|
|
firOpBuilder.create<mlir::cf::BranchOp>(info.loc, exit);
|
|
}
|
|
return exit;
|
|
};
|
|
|
|
if (auto *exitBlock = getUniqueExit(op.getRegion(0))) {
|
|
firOpBuilder.setInsertionPointToEnd(exitBlock);
|
|
auto *term = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc);
|
|
// Only insert lastprivate code when there actually is an exit block.
|
|
// Such a block may not exist if the nested code produced an infinite
|
|
// loop (this may not make sense in production code, but a user could
|
|
// write that and we should handle it).
|
|
firOpBuilder.setInsertionPoint(term);
|
|
if (privatize) {
|
|
// DataSharingProcessor::processStep2() may create operations before/after
|
|
// the one passed as argument. We need to treat loop wrappers and their
|
|
// nested loop as a unit, so we need to pass the bottom level wrapper (if
|
|
// present). Otherwise, these operations will be inserted within a
|
|
// wrapper region.
|
|
mlir::Operation *privatizationBottomLevelOp = &op;
|
|
if (auto loopNest = llvm::dyn_cast<mlir::omp::LoopNestOp>(op)) {
|
|
llvm::SmallVector<mlir::omp::LoopWrapperInterface> wrappers;
|
|
loopNest.gatherWrappers(wrappers);
|
|
if (!wrappers.empty())
|
|
privatizationBottomLevelOp = &*wrappers.front();
|
|
}
|
|
|
|
if (!info.dsp) {
|
|
assert(tempDsp.has_value());
|
|
tempDsp->processStep2(privatizationBottomLevelOp, isLoop);
|
|
} else {
|
|
if (isLoop && regionArgs.size() > 0) {
|
|
for (const auto ®ionArg : regionArgs) {
|
|
info.dsp->pushLoopIV(info.converter.getSymbolAddress(*regionArg));
|
|
}
|
|
}
|
|
info.dsp->processStep2(privatizationBottomLevelOp, isLoop);
|
|
}
|
|
}
|
|
}
|
|
|
|
firOpBuilder.setInsertionPointAfter(marker);
|
|
marker->erase();
|
|
}
|
|
|
|
static void genBodyOfTargetDataOp(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
mlir::omp::TargetDataOp &dataOp, const EntryBlockArgs &args,
|
|
const mlir::Location ¤tLocation, const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
|
|
genEntryBlock(firOpBuilder, args, dataOp.getRegion());
|
|
bindEntryBlockArgs(converter, dataOp, args);
|
|
|
|
// Insert dummy instruction to remember the insertion position. The
|
|
// marker will be deleted by clean up passes since there are no uses.
|
|
// Remembering the position for further insertion is important since
|
|
// there are hlfir.declares inserted above while setting block arguments
|
|
// and new code from the body should be inserted after that.
|
|
mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
|
|
dataOp.getLoc(), firOpBuilder.getIndexType());
|
|
|
|
// Create blocks for unstructured regions. This has to be done since
|
|
// blocks are initially allocated with the function as the parent region.
|
|
if (eval.lowerAsUnstructured()) {
|
|
lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>(
|
|
firOpBuilder, eval.getNestedEvaluations());
|
|
}
|
|
|
|
firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);
|
|
|
|
// Set the insertion point after the marker.
|
|
firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
|
|
|
|
if (ConstructQueue::const_iterator next = std::next(item);
|
|
next != queue.end()) {
|
|
genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
|
|
next);
|
|
} else {
|
|
genNestedEvaluations(converter, eval);
|
|
}
|
|
}
|
|
|
|
// This generates intermediate common block member accesses within a region
|
|
// and then rebinds the members symbol to the intermediate accessors we have
|
|
// generated so that subsequent code generation will utilise these instead.
|
|
//
|
|
// When the scope changes, the bindings to the intermediate accessors should
|
|
// be dropped in place of the original symbol bindings.
|
|
//
|
|
// This is for utilisation with TargetOp.
|
|
static void genIntermediateCommonBlockAccessors(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
const mlir::Location ¤tLocation,
|
|
llvm::ArrayRef<mlir::BlockArgument> mapBlockArgs,
|
|
llvm::ArrayRef<const Fortran::semantics::Symbol *> mapSyms) {
|
|
// Iterate over the symbol list, which will be shorter than the list of
|
|
// arguments if new entry block arguments were introduced to implicitly map
|
|
// outside values used by the bounds cloned into the target region. In that
|
|
// case, the additional block arguments do not need processing here.
|
|
for (auto [mapSym, mapArg] : llvm::zip_first(mapSyms, mapBlockArgs)) {
|
|
auto *details = mapSym->detailsIf<Fortran::semantics::CommonBlockDetails>();
|
|
if (!details)
|
|
continue;
|
|
|
|
for (auto obj : details->objects()) {
|
|
auto targetCBMemberBind = Fortran::lower::genCommonBlockMember(
|
|
converter, currentLocation, *obj, mapArg);
|
|
fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*obj);
|
|
fir::ExtendedValue targetCBExv =
|
|
getExtendedValue(sexv, targetCBMemberBind);
|
|
converter.bindSymbol(*obj, targetCBExv);
|
|
}
|
|
}
|
|
}
|
|
|
|
// This functions creates a block for the body of the targetOp's region. It adds
|
|
// all the symbols present in mapSymbols as block arguments to this block.
|
|
static void genBodyOfTargetOp(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
mlir::omp::TargetOp &targetOp, const EntryBlockArgs &args,
|
|
const mlir::Location ¤tLocation, const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp);
|
|
|
|
mlir::Region ®ion = targetOp.getRegion();
|
|
mlir::Block *entryBlock = genEntryBlock(firOpBuilder, args, region);
|
|
bindEntryBlockArgs(converter, targetOp, args);
|
|
if (!hostEvalInfo.empty())
|
|
hostEvalInfo.back().bindOperands(argIface.getHostEvalBlockArgs());
|
|
|
|
// Check if cloning the bounds introduced any dependency on the outer region.
|
|
// If so, then either clone them as well if they are MemoryEffectFree, or else
|
|
// copy them to a new temporary and add them to the map and block_argument
|
|
// lists and replace their uses with the new temporary.
|
|
llvm::SetVector<mlir::Value> valuesDefinedAbove;
|
|
mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
|
|
while (!valuesDefinedAbove.empty()) {
|
|
for (mlir::Value val : valuesDefinedAbove) {
|
|
mlir::Operation *valOp = val.getDefiningOp();
|
|
assert(valOp != nullptr);
|
|
|
|
// NOTE: We skip BoxDimsOp's as the lesser of two evils is to map the
|
|
// indices separately, as the alternative is to eventually map the Box,
|
|
// which comes with a fairly large overhead comparatively. We could be
|
|
// more robust about this and check using a BackwardsSlice to see if we
|
|
// run the risk of mapping a box.
|
|
if (mlir::isMemoryEffectFree(valOp) &&
|
|
!mlir::isa<fir::BoxDimsOp>(valOp)) {
|
|
mlir::Operation *clonedOp = valOp->clone();
|
|
entryBlock->push_front(clonedOp);
|
|
|
|
auto replace = [entryBlock](mlir::OpOperand &use) {
|
|
return use.getOwner()->getBlock() == entryBlock;
|
|
};
|
|
|
|
valOp->getResults().replaceUsesWithIf(clonedOp->getResults(), replace);
|
|
valOp->replaceUsesWithIf(clonedOp, replace);
|
|
} else {
|
|
auto savedIP = firOpBuilder.getInsertionPoint();
|
|
firOpBuilder.setInsertionPointAfter(valOp);
|
|
auto copyVal =
|
|
firOpBuilder.createTemporary(val.getLoc(), val.getType());
|
|
firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);
|
|
|
|
fir::factory::AddrAndBoundsInfo info =
|
|
fir::factory::getDataOperandBaseAddr(
|
|
firOpBuilder, val, /*isOptional=*/false, val.getLoc());
|
|
llvm::SmallVector<mlir::Value> bounds =
|
|
fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
|
|
mlir::omp::MapBoundsType>(
|
|
firOpBuilder, info,
|
|
hlfir::translateToExtendedValue(val.getLoc(), firOpBuilder,
|
|
hlfir::Entity{val})
|
|
.first,
|
|
/*dataExvIsAssumedSize=*/false, val.getLoc());
|
|
|
|
std::stringstream name;
|
|
firOpBuilder.setInsertionPoint(targetOp);
|
|
|
|
llvm::omp::OpenMPOffloadMappingFlags mapFlag =
|
|
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
|
|
mlir::omp::VariableCaptureKind captureKind =
|
|
mlir::omp::VariableCaptureKind::ByRef;
|
|
|
|
mlir::Type eleType = copyVal.getType();
|
|
if (auto refType =
|
|
mlir::dyn_cast<fir::ReferenceType>(copyVal.getType()))
|
|
eleType = refType.getElementType();
|
|
|
|
if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
|
|
captureKind = mlir::omp::VariableCaptureKind::ByCopy;
|
|
} else if (!fir::isa_builtin_cptr_type(eleType)) {
|
|
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
|
|
}
|
|
|
|
mlir::Value mapOp = createMapInfoOp(
|
|
firOpBuilder, copyVal.getLoc(), copyVal,
|
|
/*varPtrPtr=*/mlir::Value{}, name.str(), bounds,
|
|
/*members=*/llvm::SmallVector<mlir::Value>{},
|
|
/*membersIndex=*/mlir::ArrayAttr{},
|
|
static_cast<
|
|
std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
|
|
mapFlag),
|
|
captureKind, copyVal.getType());
|
|
|
|
// Get the index of the first non-map argument before modifying mapVars,
|
|
// then append an element to mapVars and an associated entry block
|
|
// argument at that index.
|
|
unsigned insertIndex =
|
|
argIface.getMapBlockArgsStart() + argIface.numMapBlockArgs();
|
|
targetOp.getMapVarsMutable().append(mapOp);
|
|
mlir::Value clonedValArg = region.insertArgument(
|
|
insertIndex, copyVal.getType(), copyVal.getLoc());
|
|
|
|
firOpBuilder.setInsertionPointToStart(entryBlock);
|
|
auto loadOp = firOpBuilder.create<fir::LoadOp>(clonedValArg.getLoc(),
|
|
clonedValArg);
|
|
val.replaceUsesWithIf(loadOp->getResult(0),
|
|
[entryBlock](mlir::OpOperand &use) {
|
|
return use.getOwner()->getBlock() == entryBlock;
|
|
});
|
|
firOpBuilder.setInsertionPoint(entryBlock, savedIP);
|
|
}
|
|
}
|
|
valuesDefinedAbove.clear();
|
|
mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
|
|
}
|
|
|
|
// Insert dummy instruction to remember the insertion position. The
|
|
// marker will be deleted since there are not uses.
|
|
// In the HLFIR flow there are hlfir.declares inserted above while
|
|
// setting block arguments.
|
|
mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
|
|
targetOp.getLoc(), firOpBuilder.getIndexType());
|
|
|
|
// Create blocks for unstructured regions. This has to be done since
|
|
// blocks are initially allocated with the function as the parent region.
|
|
if (lower::omp::isLastItemInQueue(item, queue) &&
|
|
eval.lowerAsUnstructured()) {
|
|
lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>(
|
|
firOpBuilder, eval.getNestedEvaluations());
|
|
}
|
|
|
|
firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);
|
|
|
|
// Create the insertion point after the marker.
|
|
firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
|
|
|
|
// If we map a common block using it's symbol e.g. map(tofrom: /common_block/)
|
|
// and accessing its members within the target region, there is a large
|
|
// chance we will end up with uses external to the region accessing the common
|
|
// resolve these, we do so by generating new common block member accesses
|
|
// within the region, binding them to the member symbol for the scope of the
|
|
// region so that subsequent code generation within the region will utilise
|
|
// our new member accesses we have created.
|
|
genIntermediateCommonBlockAccessors(
|
|
converter, currentLocation, argIface.getMapBlockArgs(), args.map.syms);
|
|
|
|
if (ConstructQueue::const_iterator next = std::next(item);
|
|
next != queue.end()) {
|
|
genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
|
|
next);
|
|
} else {
|
|
genNestedEvaluations(converter, eval);
|
|
}
|
|
|
|
dsp.processStep2(targetOp, /*isLoop=*/false);
|
|
}
|
|
|
|
template <typename OpTy, typename... Args>
|
|
static OpTy genOpWithBody(const OpWithBodyGenInfo &info,
|
|
const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item, Args &&...args) {
|
|
auto op = info.converter.getFirOpBuilder().create<OpTy>(
|
|
info.loc, std::forward<Args>(args)...);
|
|
createBodyOfOp(*op, info, queue, item);
|
|
return op;
|
|
}
|
|
|
|
template <typename OpTy, typename ClauseOpsTy>
|
|
static OpTy genWrapperOp(lower::AbstractConverter &converter,
|
|
mlir::Location loc, const ClauseOpsTy &clauseOps,
|
|
const EntryBlockArgs &args) {
|
|
static_assert(
|
|
OpTy::template hasTrait<mlir::omp::LoopWrapperInterface::Trait>(),
|
|
"expected a loop wrapper");
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
|
|
// Create wrapper.
|
|
auto op = firOpBuilder.create<OpTy>(loc, clauseOps);
|
|
|
|
// Create entry block with arguments.
|
|
genEntryBlock(firOpBuilder, args, op.getRegion());
|
|
|
|
return op;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Code generation functions for clauses
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static void genCancelClauses(lower::AbstractConverter &converter,
|
|
semantics::SemanticsContext &semaCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
mlir::omp::CancelOperands &clauseOps) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processCancelDirectiveName(clauseOps);
|
|
cp.processIf(llvm::omp::Directive::OMPD_cancel, clauseOps);
|
|
}
|
|
|
|
static void
|
|
genCancellationPointClauses(lower::AbstractConverter &converter,
|
|
semantics::SemanticsContext &semaCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
mlir::omp::CancellationPointOperands &clauseOps) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processCancelDirectiveName(clauseOps);
|
|
}
|
|
|
|
static void genCriticalDeclareClauses(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
mlir::omp::CriticalDeclareOperands &clauseOps, llvm::StringRef name) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processHint(clauseOps);
|
|
clauseOps.symName =
|
|
mlir::StringAttr::get(converter.getFirOpBuilder().getContext(), name);
|
|
}
|
|
|
|
static void genDistributeClauses(lower::AbstractConverter &converter,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::StatementContext &stmtCtx,
|
|
const List<Clause> &clauses,
|
|
mlir::Location loc,
|
|
mlir::omp::DistributeOperands &clauseOps) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processAllocate(clauseOps);
|
|
cp.processDistSchedule(stmtCtx, clauseOps);
|
|
cp.processOrder(clauseOps);
|
|
}
|
|
|
|
static void genFlushClauses(lower::AbstractConverter &converter,
|
|
semantics::SemanticsContext &semaCtx,
|
|
const ObjectList &objects,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
llvm::SmallVectorImpl<mlir::Value> &operandRange) {
|
|
if (!objects.empty())
|
|
genObjectList(objects, converter, operandRange);
|
|
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processTODO<clause::AcqRel, clause::Acquire, clause::Release,
|
|
clause::SeqCst>(loc, llvm::omp::OMPD_flush);
|
|
}
|
|
|
|
static void
|
|
genLoopNestClauses(lower::AbstractConverter &converter,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, const List<Clause> &clauses,
|
|
mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
|
|
if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps, iv))
|
|
cp.processCollapse(loc, eval, clauseOps, iv);
|
|
|
|
clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr();
|
|
}
|
|
|
|
static void genLoopClauses(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
mlir::omp::LoopOperands &clauseOps,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processBind(clauseOps);
|
|
cp.processOrder(clauseOps);
|
|
cp.processReduction(loc, clauseOps, reductionSyms);
|
|
cp.processTODO<clause::Lastprivate>(loc, llvm::omp::Directive::OMPD_loop);
|
|
}
|
|
|
|
static void genMaskedClauses(lower::AbstractConverter &converter,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::StatementContext &stmtCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
mlir::omp::MaskedOperands &clauseOps) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processFilter(stmtCtx, clauseOps);
|
|
}
|
|
|
|
static void
|
|
genOrderedRegionClauses(lower::AbstractConverter &converter,
|
|
semantics::SemanticsContext &semaCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
mlir::omp::OrderedRegionOperands &clauseOps) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processTODO<clause::Simd>(loc, llvm::omp::Directive::OMPD_ordered);
|
|
}
|
|
|
|
static void genParallelClauses(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
lower::StatementContext &stmtCtx, const List<Clause> &clauses,
|
|
mlir::Location loc, mlir::omp::ParallelOperands &clauseOps,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processAllocate(clauseOps);
|
|
cp.processIf(llvm::omp::Directive::OMPD_parallel, clauseOps);
|
|
|
|
if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps))
|
|
cp.processNumThreads(stmtCtx, clauseOps);
|
|
|
|
cp.processProcBind(clauseOps);
|
|
cp.processReduction(loc, clauseOps, reductionSyms);
|
|
}
|
|
|
|
static void genScanClauses(lower::AbstractConverter &converter,
|
|
semantics::SemanticsContext &semaCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
mlir::omp::ScanOperands &clauseOps) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processInclusive(loc, clauseOps);
|
|
cp.processExclusive(loc, clauseOps);
|
|
}
|
|
|
|
static void genSectionsClauses(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
mlir::omp::SectionsOperands &clauseOps,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processAllocate(clauseOps);
|
|
cp.processNowait(clauseOps);
|
|
cp.processReduction(loc, clauseOps, reductionSyms);
|
|
// TODO Support delayed privatization.
|
|
}
|
|
|
|
static void genSimdClauses(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
mlir::omp::SimdOperands &clauseOps,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processAligned(clauseOps);
|
|
cp.processIf(llvm::omp::Directive::OMPD_simd, clauseOps);
|
|
cp.processNontemporal(clauseOps);
|
|
cp.processOrder(clauseOps);
|
|
cp.processReduction(loc, clauseOps, reductionSyms);
|
|
cp.processSafelen(clauseOps);
|
|
cp.processSimdlen(clauseOps);
|
|
|
|
cp.processTODO<clause::Linear>(loc, llvm::omp::Directive::OMPD_simd);
|
|
}
|
|
|
|
static void genSingleClauses(lower::AbstractConverter &converter,
|
|
semantics::SemanticsContext &semaCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
mlir::omp::SingleOperands &clauseOps) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processAllocate(clauseOps);
|
|
cp.processCopyprivate(loc, clauseOps);
|
|
cp.processNowait(clauseOps);
|
|
// TODO Support delayed privatization.
|
|
}
|
|
|
|
static void genTargetClauses(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
lower::SymMap &symTable, lower::StatementContext &stmtCtx,
|
|
lower::pft::Evaluation &eval, const List<Clause> &clauses,
|
|
mlir::Location loc, mlir::omp::TargetOperands &clauseOps,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &hasDeviceAddrSyms,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &isDevicePtrSyms,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processBare(clauseOps);
|
|
cp.processDepend(symTable, stmtCtx, clauseOps);
|
|
cp.processDevice(stmtCtx, clauseOps);
|
|
cp.processHasDeviceAddr(stmtCtx, clauseOps, hasDeviceAddrSyms);
|
|
if (!hostEvalInfo.empty()) {
|
|
// Only process host_eval if compiling for the host device.
|
|
processHostEvalClauses(converter, semaCtx, stmtCtx, eval, loc);
|
|
hostEvalInfo.back().collectValues(clauseOps.hostEvalVars);
|
|
}
|
|
cp.processIf(llvm::omp::Directive::OMPD_target, clauseOps);
|
|
cp.processIsDevicePtr(clauseOps, isDevicePtrSyms);
|
|
cp.processMap(loc, stmtCtx, clauseOps, &mapSyms);
|
|
cp.processNowait(clauseOps);
|
|
cp.processThreadLimit(stmtCtx, clauseOps);
|
|
|
|
cp.processTODO<clause::Allocate, clause::Defaultmap, clause::InReduction,
|
|
clause::UsesAllocators>(loc,
|
|
llvm::omp::Directive::OMPD_target);
|
|
|
|
// `target private(..)` is only supported in delayed privatization mode.
|
|
if (!enableDelayedPrivatizationStaging)
|
|
cp.processTODO<clause::Firstprivate, clause::Private>(
|
|
loc, llvm::omp::Directive::OMPD_target);
|
|
}
|
|
|
|
static void genTargetDataClauses(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
lower::StatementContext &stmtCtx, const List<Clause> &clauses,
|
|
mlir::Location loc, mlir::omp::TargetDataOperands &clauseOps,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceAddrSyms,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &useDevicePtrSyms) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processDevice(stmtCtx, clauseOps);
|
|
cp.processIf(llvm::omp::Directive::OMPD_target_data, clauseOps);
|
|
cp.processMap(loc, stmtCtx, clauseOps);
|
|
cp.processUseDeviceAddr(stmtCtx, clauseOps, useDeviceAddrSyms);
|
|
cp.processUseDevicePtr(stmtCtx, clauseOps, useDevicePtrSyms);
|
|
|
|
// This function implements the deprecated functionality of use_device_ptr
|
|
// that allows users to provide non-CPTR arguments to it with the caveat
|
|
// that the compiler will treat them as use_device_addr. A lot of legacy
|
|
// code may still depend on this functionality, so we should support it
|
|
// in some manner. We do so currently by simply shifting non-cptr operands
|
|
// from the use_device_ptr lists into the use_device_addr lists.
|
|
// TODO: Perhaps create a user provideable compiler option that will
|
|
// re-introduce a hard-error rather than a warning in these cases.
|
|
promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr(
|
|
clauseOps.useDeviceAddrVars, useDeviceAddrSyms,
|
|
clauseOps.useDevicePtrVars, useDevicePtrSyms);
|
|
}
|
|
|
|
static void genTargetEnterExitUpdateDataClauses(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
lower::SymMap &symTable, lower::StatementContext &stmtCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
llvm::omp::Directive directive,
|
|
mlir::omp::TargetEnterExitUpdateDataOperands &clauseOps) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processDepend(symTable, stmtCtx, clauseOps);
|
|
cp.processDevice(stmtCtx, clauseOps);
|
|
cp.processIf(directive, clauseOps);
|
|
|
|
if (directive == llvm::omp::Directive::OMPD_target_update)
|
|
cp.processMotionClauses(stmtCtx, clauseOps);
|
|
else
|
|
cp.processMap(loc, stmtCtx, clauseOps);
|
|
|
|
cp.processNowait(clauseOps);
|
|
}
|
|
|
|
static void genTaskClauses(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
lower::SymMap &symTable, lower::StatementContext &stmtCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
mlir::omp::TaskOperands &clauseOps,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &inReductionSyms) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processAllocate(clauseOps);
|
|
cp.processDepend(symTable, stmtCtx, clauseOps);
|
|
cp.processFinal(stmtCtx, clauseOps);
|
|
cp.processIf(llvm::omp::Directive::OMPD_task, clauseOps);
|
|
cp.processInReduction(loc, clauseOps, inReductionSyms);
|
|
cp.processMergeable(clauseOps);
|
|
cp.processPriority(stmtCtx, clauseOps);
|
|
cp.processUntied(clauseOps);
|
|
cp.processDetach(clauseOps);
|
|
|
|
cp.processTODO<clause::Affinity>(loc, llvm::omp::Directive::OMPD_task);
|
|
}
|
|
|
|
static void genTaskgroupClauses(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
mlir::omp::TaskgroupOperands &clauseOps,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &taskReductionSyms) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processAllocate(clauseOps);
|
|
cp.processTaskReduction(loc, clauseOps, taskReductionSyms);
|
|
}
|
|
|
|
static void genTaskloopClauses(lower::AbstractConverter &converter,
|
|
semantics::SemanticsContext &semaCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
mlir::omp::TaskloopOperands &clauseOps) {
|
|
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
|
|
cp.processTODO<clause::Allocate, clause::Collapse, clause::Default,
|
|
clause::Final, clause::Grainsize, clause::If,
|
|
clause::InReduction, clause::Lastprivate, clause::Mergeable,
|
|
clause::Nogroup, clause::NumTasks, clause::Priority,
|
|
clause::Reduction, clause::Shared, clause::Untied>(
|
|
loc, llvm::omp::Directive::OMPD_taskloop);
|
|
}
|
|
|
|
static void genTaskwaitClauses(lower::AbstractConverter &converter,
|
|
semantics::SemanticsContext &semaCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
mlir::omp::TaskwaitOperands &clauseOps) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processTODO<clause::Depend, clause::Nowait>(
|
|
loc, llvm::omp::Directive::OMPD_taskwait);
|
|
}
|
|
|
|
static void genWorkshareClauses(lower::AbstractConverter &converter,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::StatementContext &stmtCtx,
|
|
const List<Clause> &clauses, mlir::Location loc,
|
|
mlir::omp::WorkshareOperands &clauseOps) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processNowait(clauseOps);
|
|
}
|
|
|
|
static void genTeamsClauses(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
lower::StatementContext &stmtCtx, const List<Clause> &clauses,
|
|
mlir::Location loc, mlir::omp::TeamsOperands &clauseOps,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processAllocate(clauseOps);
|
|
cp.processIf(llvm::omp::Directive::OMPD_teams, clauseOps);
|
|
|
|
if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps)) {
|
|
cp.processNumTeams(stmtCtx, clauseOps);
|
|
cp.processThreadLimit(stmtCtx, clauseOps);
|
|
}
|
|
|
|
cp.processReduction(loc, clauseOps, reductionSyms);
|
|
// TODO Support delayed privatization.
|
|
}
|
|
|
|
static void genWsloopClauses(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
lower::StatementContext &stmtCtx, const List<Clause> &clauses,
|
|
mlir::Location loc, mlir::omp::WsloopOperands &clauseOps,
|
|
llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processNowait(clauseOps);
|
|
cp.processOrder(clauseOps);
|
|
cp.processOrdered(clauseOps);
|
|
cp.processReduction(loc, clauseOps, reductionSyms);
|
|
cp.processSchedule(stmtCtx, clauseOps);
|
|
|
|
cp.processTODO<clause::Allocate, clause::Linear>(
|
|
loc, llvm::omp::Directive::OMPD_do);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Code generation functions for leaf constructs
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static mlir::omp::BarrierOp
|
|
genBarrierOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
mlir::Location loc, const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
return converter.getFirOpBuilder().create<mlir::omp::BarrierOp>(loc);
|
|
}
|
|
|
|
static mlir::omp::CancelOp genCancelOp(lower::AbstractConverter &converter,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
mlir::Location loc,
|
|
const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
mlir::omp::CancelOperands clauseOps;
|
|
genCancelClauses(converter, semaCtx, item->clauses, loc, clauseOps);
|
|
|
|
return converter.getFirOpBuilder().create<mlir::omp::CancelOp>(loc,
|
|
clauseOps);
|
|
}
|
|
|
|
static mlir::omp::CancellationPointOp genCancellationPointOp(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
|
|
mlir::omp::CancellationPointOperands clauseOps;
|
|
genCancellationPointClauses(converter, semaCtx, item->clauses, loc,
|
|
clauseOps);
|
|
|
|
return converter.getFirOpBuilder().create<mlir::omp::CancellationPointOp>(
|
|
loc, clauseOps);
|
|
}
|
|
|
|
static mlir::omp::CriticalOp
|
|
genCriticalOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item,
|
|
const std::optional<parser::Name> &name) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
mlir::FlatSymbolRefAttr nameAttr;
|
|
|
|
if (name) {
|
|
std::string nameStr = name->ToString();
|
|
mlir::ModuleOp mod = firOpBuilder.getModule();
|
|
auto global = mod.lookupSymbol<mlir::omp::CriticalDeclareOp>(nameStr);
|
|
if (!global) {
|
|
mlir::omp::CriticalDeclareOperands clauseOps;
|
|
genCriticalDeclareClauses(converter, semaCtx, item->clauses, loc,
|
|
clauseOps, nameStr);
|
|
|
|
mlir::OpBuilder modBuilder(mod.getBodyRegion());
|
|
global = modBuilder.create<mlir::omp::CriticalDeclareOp>(loc, clauseOps);
|
|
}
|
|
nameAttr = mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(),
|
|
global.getSymName());
|
|
}
|
|
|
|
return genOpWithBody<mlir::omp::CriticalOp>(
|
|
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
|
|
llvm::omp::Directive::OMPD_critical),
|
|
queue, item, nameAttr);
|
|
}
|
|
|
|
static mlir::omp::FlushOp
|
|
genFlushOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
mlir::Location loc, const ObjectList &objects,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
|
|
llvm::SmallVector<mlir::Value> operandRange;
|
|
genFlushClauses(converter, semaCtx, objects, item->clauses, loc,
|
|
operandRange);
|
|
|
|
return converter.getFirOpBuilder().create<mlir::omp::FlushOp>(
|
|
converter.getCurrentLocation(), operandRange);
|
|
}
|
|
|
|
static mlir::omp::LoopNestOp genLoopNestOp(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
mlir::Location loc, const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item, mlir::omp::LoopNestOperands &clauseOps,
|
|
llvm::ArrayRef<const semantics::Symbol *> iv,
|
|
llvm::ArrayRef<
|
|
std::pair<mlir::omp::BlockArgOpenMPOpInterface, const EntryBlockArgs &>>
|
|
wrapperArgs,
|
|
llvm::omp::Directive directive, DataSharingProcessor &dsp) {
|
|
auto ivCallback = [&](mlir::Operation *op) {
|
|
genLoopVars(op, converter, loc, iv, wrapperArgs);
|
|
return llvm::SmallVector<const semantics::Symbol *>(iv);
|
|
};
|
|
|
|
auto *nestedEval =
|
|
getCollapsedLoopEval(eval, getCollapseValue(item->clauses));
|
|
|
|
return genOpWithBody<mlir::omp::LoopNestOp>(
|
|
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval,
|
|
directive)
|
|
.setClauses(&item->clauses)
|
|
.setDataSharingProcessor(&dsp)
|
|
.setGenRegionEntryCb(ivCallback),
|
|
queue, item, clauseOps);
|
|
}
|
|
|
|
static mlir::omp::LoopOp
|
|
genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
mlir::Location loc, const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
mlir::omp::LoopOperands loopClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> loopReductionSyms;
|
|
genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps,
|
|
loopReductionSyms);
|
|
|
|
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
|
|
/*shouldCollectPreDeterminedSymbols=*/true,
|
|
/*useDelayedPrivatization=*/true, symTable);
|
|
dsp.processStep1(&loopClauseOps);
|
|
|
|
mlir::omp::LoopNestOperands loopNestClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> iv;
|
|
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
|
|
loopNestClauseOps, iv);
|
|
|
|
EntryBlockArgs loopArgs;
|
|
loopArgs.priv.syms = dsp.getDelayedPrivSymbols();
|
|
loopArgs.priv.vars = loopClauseOps.privateVars;
|
|
loopArgs.reduction.syms = loopReductionSyms;
|
|
loopArgs.reduction.vars = loopClauseOps.reductionVars;
|
|
|
|
auto loopOp =
|
|
genWrapperOp<mlir::omp::LoopOp>(converter, loc, loopClauseOps, loopArgs);
|
|
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
|
|
loopNestClauseOps, iv, {{loopOp, loopArgs}},
|
|
llvm::omp::Directive::OMPD_loop, dsp);
|
|
return loopOp;
|
|
}
|
|
|
|
static mlir::omp::MaskedOp
|
|
genMaskedOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
mlir::Location loc, const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
mlir::omp::MaskedOperands clauseOps;
|
|
genMaskedClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps);
|
|
|
|
return genOpWithBody<mlir::omp::MaskedOp>(
|
|
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
|
|
llvm::omp::Directive::OMPD_masked),
|
|
queue, item, clauseOps);
|
|
}
|
|
|
|
static mlir::omp::MasterOp
|
|
genMasterOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
mlir::Location loc, const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
return genOpWithBody<mlir::omp::MasterOp>(
|
|
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
|
|
llvm::omp::Directive::OMPD_master),
|
|
queue, item);
|
|
}
|
|
|
|
static mlir::omp::OrderedOp
|
|
genOrderedOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
mlir::Location loc, const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
TODO(loc, "OMPD_ordered");
|
|
return nullptr;
|
|
}
|
|
|
|
static mlir::omp::OrderedRegionOp
|
|
genOrderedRegionOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
mlir::omp::OrderedRegionOperands clauseOps;
|
|
genOrderedRegionClauses(converter, semaCtx, item->clauses, loc, clauseOps);
|
|
|
|
return genOpWithBody<mlir::omp::OrderedRegionOp>(
|
|
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
|
|
llvm::omp::Directive::OMPD_ordered),
|
|
queue, item, clauseOps);
|
|
}
|
|
|
|
static mlir::omp::ParallelOp
|
|
genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item,
|
|
mlir::omp::ParallelOperands &clauseOps,
|
|
const EntryBlockArgs &args, DataSharingProcessor *dsp,
|
|
bool isComposite = false) {
|
|
assert((!enableDelayedPrivatization || dsp) &&
|
|
"expected valid DataSharingProcessor");
|
|
|
|
OpWithBodyGenInfo genInfo =
|
|
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
|
|
llvm::omp::Directive::OMPD_parallel)
|
|
.setClauses(&item->clauses)
|
|
.setEntryBlockArgs(&args)
|
|
.setGenSkeletonOnly(isComposite)
|
|
.setDataSharingProcessor(dsp);
|
|
|
|
auto parallelOp =
|
|
genOpWithBody<mlir::omp::ParallelOp>(genInfo, queue, item, clauseOps);
|
|
parallelOp.setComposite(isComposite);
|
|
return parallelOp;
|
|
}
|
|
|
|
static mlir::omp::ScanOp
|
|
genScanOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
|
|
mlir::omp::ScanOperands clauseOps;
|
|
genScanClauses(converter, semaCtx, item->clauses, loc, clauseOps);
|
|
return converter.getFirOpBuilder().create<mlir::omp::ScanOp>(
|
|
converter.getCurrentLocation(), clauseOps);
|
|
}
|
|
|
|
/// This breaks the normal prototype of the gen*Op functions: adding the
|
|
/// sectionBlocks argument so that the enclosed section constructs can be
|
|
/// lowered here with correct reduction symbol remapping.
|
|
static mlir::omp::SectionsOp
|
|
genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item,
|
|
const parser::OmpSectionBlocks §ionBlocks) {
|
|
mlir::omp::SectionsOperands clauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> reductionSyms;
|
|
genSectionsClauses(converter, semaCtx, item->clauses, loc, clauseOps,
|
|
reductionSyms);
|
|
|
|
auto &builder = converter.getFirOpBuilder();
|
|
|
|
// Insert privatizations before SECTIONS
|
|
lower::SymMapScope scope(symTable);
|
|
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
|
|
lower::omp::isLastItemInQueue(item, queue),
|
|
/*useDelayedPrivatization=*/false, symTable);
|
|
dsp.processStep1();
|
|
|
|
List<Clause> nonDsaClauses;
|
|
List<const clause::Lastprivate *> lastprivates;
|
|
|
|
for (const Clause &clause : item->clauses) {
|
|
if (clause.id == llvm::omp::Clause::OMPC_lastprivate) {
|
|
auto &lastp = std::get<clause::Lastprivate>(clause.u);
|
|
lastprivateModifierNotSupported(lastp, converter.getCurrentLocation());
|
|
lastprivates.push_back(&lastp);
|
|
} else {
|
|
switch (clause.id) {
|
|
case llvm::omp::Clause::OMPC_firstprivate:
|
|
case llvm::omp::Clause::OMPC_private:
|
|
case llvm::omp::Clause::OMPC_shared:
|
|
break;
|
|
default:
|
|
nonDsaClauses.push_back(clause);
|
|
}
|
|
}
|
|
}
|
|
|
|
// SECTIONS construct.
|
|
auto sectionsOp = builder.create<mlir::omp::SectionsOp>(loc, clauseOps);
|
|
|
|
// Create entry block with reduction variables as arguments.
|
|
EntryBlockArgs args;
|
|
// TODO: Add private syms and vars.
|
|
args.reduction.syms = reductionSyms;
|
|
args.reduction.vars = clauseOps.reductionVars;
|
|
|
|
genEntryBlock(builder, args, sectionsOp.getRegion());
|
|
mlir::Operation *terminator =
|
|
lower::genOpenMPTerminator(builder, sectionsOp, loc);
|
|
|
|
// Generate nested SECTION constructs.
|
|
// This is done here rather than in genOMP([...], OpenMPSectionConstruct )
|
|
// because we need to run genReductionVars on each omp.section so that the
|
|
// reduction variable gets mapped to the private version
|
|
for (auto [construct, nestedEval] :
|
|
llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) {
|
|
const auto *sectionConstruct =
|
|
std::get_if<parser::OpenMPSectionConstruct>(&construct.u);
|
|
if (!sectionConstruct) {
|
|
assert(false &&
|
|
"unexpected construct nested inside of SECTIONS construct");
|
|
continue;
|
|
}
|
|
|
|
ConstructQueue sectionQueue{buildConstructQueue(
|
|
converter.getFirOpBuilder().getModule(), semaCtx, nestedEval,
|
|
sectionConstruct->source, llvm::omp::Directive::OMPD_section, {})};
|
|
|
|
builder.setInsertionPoint(terminator);
|
|
genOpWithBody<mlir::omp::SectionOp>(
|
|
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval,
|
|
llvm::omp::Directive::OMPD_section)
|
|
.setClauses(§ionQueue.begin()->clauses)
|
|
.setEntryBlockArgs(&args),
|
|
sectionQueue, sectionQueue.begin());
|
|
}
|
|
|
|
if (!lastprivates.empty()) {
|
|
mlir::Region §ionsBody = sectionsOp.getRegion();
|
|
assert(sectionsBody.hasOneBlock());
|
|
mlir::Block &body = sectionsBody.front();
|
|
|
|
auto lastSectionOp = llvm::find_if(
|
|
llvm::reverse(body.getOperations()), [](const mlir::Operation &op) {
|
|
return llvm::isa<mlir::omp::SectionOp>(op);
|
|
});
|
|
assert(lastSectionOp != body.rend());
|
|
|
|
for (const clause::Lastprivate *lastp : lastprivates) {
|
|
builder.setInsertionPoint(
|
|
lastSectionOp->getRegion(0).back().getTerminator());
|
|
mlir::OpBuilder::InsertPoint insp = builder.saveInsertionPoint();
|
|
const auto &objList = std::get<ObjectList>(lastp->t);
|
|
for (const Object &object : objList) {
|
|
semantics::Symbol *sym = object.sym();
|
|
if (const auto *common =
|
|
sym->detailsIf<semantics::CommonBlockDetails>()) {
|
|
for (const auto &obj : common->objects())
|
|
converter.copyHostAssociateVar(*obj, &insp, /*hostIsSource=*/false);
|
|
} else {
|
|
converter.copyHostAssociateVar(*sym, &insp, /*hostIsSource=*/false);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Perform DataSharingProcessor's step2 out of SECTIONS
|
|
builder.setInsertionPointAfter(sectionsOp.getOperation());
|
|
dsp.processStep2(sectionsOp, false);
|
|
// Emit implicit barrier to synchronize threads and avoid data
|
|
// races on post-update of lastprivate variables when `nowait`
|
|
// clause is present.
|
|
if (clauseOps.nowait && !lastprivates.empty())
|
|
builder.create<mlir::omp::BarrierOp>(loc);
|
|
|
|
return sectionsOp;
|
|
}
|
|
|
|
static mlir::Operation *
|
|
genScopeOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
mlir::Location loc, const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
TODO(loc, "Scope construct");
|
|
return nullptr;
|
|
}
|
|
|
|
static mlir::omp::SingleOp
|
|
genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
mlir::Location loc, const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
mlir::omp::SingleOperands clauseOps;
|
|
genSingleClauses(converter, semaCtx, item->clauses, loc, clauseOps);
|
|
|
|
return genOpWithBody<mlir::omp::SingleOp>(
|
|
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
|
|
llvm::omp::Directive::OMPD_single)
|
|
.setClauses(&item->clauses),
|
|
queue, item, clauseOps);
|
|
}
|
|
|
|
static mlir::omp::TargetOp
|
|
genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
mlir::Location loc, const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
bool isTargetDevice =
|
|
llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp())
|
|
.getIsTargetDevice();
|
|
|
|
// Introduce a new host_eval information structure for this target region.
|
|
if (!isTargetDevice)
|
|
hostEvalInfo.emplace_back();
|
|
|
|
mlir::omp::TargetOperands clauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> mapSyms, isDevicePtrSyms,
|
|
hasDeviceAddrSyms;
|
|
genTargetClauses(converter, semaCtx, symTable, stmtCtx, eval, item->clauses,
|
|
loc, clauseOps, hasDeviceAddrSyms, isDevicePtrSyms, mapSyms);
|
|
|
|
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
|
|
/*shouldCollectPreDeterminedSymbols=*/
|
|
lower::omp::isLastItemInQueue(item, queue),
|
|
/*useDelayedPrivatization=*/true, symTable);
|
|
dsp.processStep1(&clauseOps);
|
|
|
|
// Check if a value of type `type` can be passed to the kernel by value.
|
|
// All kernel parameters are of pointer type, so if the value can be
|
|
// represented inside of a pointer, then it can be passed by value.
|
|
auto isLiteralType = [&](mlir::Type type) {
|
|
const mlir::DataLayout &dl = firOpBuilder.getDataLayout();
|
|
mlir::Type ptrTy =
|
|
mlir::LLVM::LLVMPointerType::get(&converter.getMLIRContext());
|
|
uint64_t ptrSize = dl.getTypeSize(ptrTy);
|
|
uint64_t ptrAlign = dl.getTypePreferredAlignment(ptrTy);
|
|
|
|
auto [size, align] = fir::getTypeSizeAndAlignmentOrCrash(
|
|
loc, type, dl, converter.getKindMap());
|
|
return size <= ptrSize && align <= ptrAlign;
|
|
};
|
|
|
|
// 5.8.1 Implicit Data-Mapping Attribute Rules
|
|
// The following code follows the implicit data-mapping rules to map all the
|
|
// symbols used inside the region that do not have explicit data-environment
|
|
// attribute clauses (neither data-sharing; e.g. `private`, nor `map`
|
|
// clauses).
|
|
auto captureImplicitMap = [&](const semantics::Symbol &sym) {
|
|
if (dsp.getAllSymbolsToPrivatize().contains(&sym))
|
|
return;
|
|
|
|
// These symbols are mapped individually in processHasDeviceAddr.
|
|
if (llvm::is_contained(hasDeviceAddrSyms, &sym))
|
|
return;
|
|
|
|
// Structure component symbols don't have bindings, and can only be
|
|
// explicitly mapped individually. If a member is captured implicitly
|
|
// we map the entirety of the derived type when we find its symbol.
|
|
if (sym.owner().IsDerivedType())
|
|
return;
|
|
|
|
// if the symbol is part of an already mapped common block, do not make a
|
|
// map for it.
|
|
if (const Fortran::semantics::Symbol *common =
|
|
Fortran::semantics::FindCommonBlockContaining(sym.GetUltimate()))
|
|
if (llvm::is_contained(mapSyms, common))
|
|
return;
|
|
|
|
// If we come across a symbol without a symbol address, we
|
|
// return as we cannot process it, this is intended as a
|
|
// catch all early exit for symbols that do not have a
|
|
// corresponding extended value. Such as subroutines,
|
|
// interfaces and named blocks.
|
|
if (!converter.getSymbolAddress(sym))
|
|
return;
|
|
|
|
if (!llvm::is_contained(mapSyms, &sym)) {
|
|
if (const auto *details =
|
|
sym.template detailsIf<semantics::HostAssocDetails>())
|
|
converter.copySymbolBinding(details->symbol(), sym);
|
|
std::stringstream name;
|
|
fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym);
|
|
name << sym.name().ToString();
|
|
|
|
mlir::FlatSymbolRefAttr mapperId;
|
|
if (sym.GetType()->category() == semantics::DeclTypeSpec::TypeDerived) {
|
|
auto &typeSpec = sym.GetType()->derivedTypeSpec();
|
|
std::string mapperIdName = typeSpec.name().ToString() + ".default";
|
|
mapperIdName = converter.mangleName(mapperIdName, *typeSpec.GetScope());
|
|
if (converter.getModuleOp().lookupSymbol(mapperIdName))
|
|
mapperId = mlir::FlatSymbolRefAttr::get(&converter.getMLIRContext(),
|
|
mapperIdName);
|
|
}
|
|
|
|
fir::factory::AddrAndBoundsInfo info =
|
|
Fortran::lower::getDataOperandBaseAddr(
|
|
converter, firOpBuilder, sym, converter.getCurrentLocation());
|
|
llvm::SmallVector<mlir::Value> bounds =
|
|
fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
|
|
mlir::omp::MapBoundsType>(
|
|
firOpBuilder, info, dataExv,
|
|
semantics::IsAssumedSizeArray(sym.GetUltimate()),
|
|
converter.getCurrentLocation());
|
|
|
|
llvm::omp::OpenMPOffloadMappingFlags mapFlag =
|
|
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
|
|
mlir::omp::VariableCaptureKind captureKind =
|
|
mlir::omp::VariableCaptureKind::ByRef;
|
|
|
|
mlir::Value baseOp = info.rawInput;
|
|
mlir::Type eleType = baseOp.getType();
|
|
if (auto refType = mlir::dyn_cast<fir::ReferenceType>(baseOp.getType()))
|
|
eleType = refType.getElementType();
|
|
|
|
// If a variable is specified in declare target link and if device
|
|
// type is not specified as `nohost`, it needs to be mapped tofrom
|
|
mlir::ModuleOp mod = firOpBuilder.getModule();
|
|
mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym));
|
|
auto declareTargetOp =
|
|
llvm::dyn_cast_if_present<mlir::omp::DeclareTargetInterface>(op);
|
|
if (declareTargetOp && declareTargetOp.isDeclareTarget()) {
|
|
if (declareTargetOp.getDeclareTargetCaptureClause() ==
|
|
mlir::omp::DeclareTargetCaptureClause::link &&
|
|
declareTargetOp.getDeclareTargetDeviceType() !=
|
|
mlir::omp::DeclareTargetDeviceType::nohost) {
|
|
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
|
|
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
|
|
}
|
|
} else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
|
|
// Scalars behave as if they were "firstprivate".
|
|
// TODO: Handle objects that are shared/lastprivate or were listed
|
|
// in an in_reduction clause.
|
|
if (isLiteralType(eleType)) {
|
|
captureKind = mlir::omp::VariableCaptureKind::ByCopy;
|
|
} else {
|
|
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
|
|
}
|
|
} else if (!fir::isa_builtin_cptr_type(eleType)) {
|
|
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
|
|
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
|
|
}
|
|
auto location =
|
|
mlir::NameLoc::get(mlir::StringAttr::get(firOpBuilder.getContext(),
|
|
sym.name().ToString()),
|
|
baseOp.getLoc());
|
|
mlir::Value mapOp = createMapInfoOp(
|
|
firOpBuilder, location, baseOp, /*varPtrPtr=*/mlir::Value{},
|
|
name.str(), bounds, /*members=*/{},
|
|
/*membersIndex=*/mlir::ArrayAttr{},
|
|
static_cast<
|
|
std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
|
|
mapFlag),
|
|
captureKind, baseOp.getType(), /*partialMap=*/false, mapperId);
|
|
|
|
clauseOps.mapVars.push_back(mapOp);
|
|
mapSyms.push_back(&sym);
|
|
}
|
|
};
|
|
lower::pft::visitAllSymbols(eval, captureImplicitMap);
|
|
|
|
auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);
|
|
|
|
llvm::SmallVector<mlir::Value> hasDeviceAddrBaseValues, mapBaseValues;
|
|
extractMappedBaseValues(clauseOps.hasDeviceAddrVars, hasDeviceAddrBaseValues);
|
|
extractMappedBaseValues(clauseOps.mapVars, mapBaseValues);
|
|
|
|
EntryBlockArgs args;
|
|
args.hasDeviceAddr.syms = hasDeviceAddrSyms;
|
|
args.hasDeviceAddr.vars = hasDeviceAddrBaseValues;
|
|
args.hostEvalVars = clauseOps.hostEvalVars;
|
|
// TODO: Add in_reduction syms and vars.
|
|
args.map.syms = mapSyms;
|
|
args.map.vars = mapBaseValues;
|
|
args.priv.syms = dsp.getDelayedPrivSymbols();
|
|
args.priv.vars = clauseOps.privateVars;
|
|
|
|
genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, args, loc,
|
|
queue, item, dsp);
|
|
|
|
// Remove the host_eval information structure created for this target region.
|
|
if (!isTargetDevice)
|
|
hostEvalInfo.pop_back();
|
|
return targetOp;
|
|
}
|
|
|
|
static mlir::omp::TargetDataOp genTargetDataOp(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
|
|
mlir::omp::TargetDataOperands clauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> useDeviceAddrSyms,
|
|
useDevicePtrSyms;
|
|
genTargetDataClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
|
|
clauseOps, useDeviceAddrSyms, useDevicePtrSyms);
|
|
|
|
auto targetDataOp =
|
|
converter.getFirOpBuilder().create<mlir::omp::TargetDataOp>(loc,
|
|
clauseOps);
|
|
|
|
llvm::SmallVector<mlir::Value> useDeviceAddrBaseValues,
|
|
useDevicePtrBaseValues;
|
|
extractMappedBaseValues(clauseOps.useDeviceAddrVars, useDeviceAddrBaseValues);
|
|
extractMappedBaseValues(clauseOps.useDevicePtrVars, useDevicePtrBaseValues);
|
|
|
|
EntryBlockArgs args;
|
|
args.useDeviceAddr.syms = useDeviceAddrSyms;
|
|
args.useDeviceAddr.vars = useDeviceAddrBaseValues;
|
|
args.useDevicePtr.syms = useDevicePtrSyms;
|
|
args.useDevicePtr.vars = useDevicePtrBaseValues;
|
|
|
|
genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, targetDataOp, args,
|
|
loc, queue, item);
|
|
return targetDataOp;
|
|
}
|
|
|
|
template <typename OpTy>
|
|
static OpTy genTargetEnterExitUpdateDataOp(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
|
|
mlir::Location loc, const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
|
|
// GCC 9.3.0 emits a (probably) bogus warning about an unused variable.
|
|
[[maybe_unused]] llvm::omp::Directive directive;
|
|
if constexpr (std::is_same_v<OpTy, mlir::omp::TargetEnterDataOp>) {
|
|
directive = llvm::omp::Directive::OMPD_target_enter_data;
|
|
} else if constexpr (std::is_same_v<OpTy, mlir::omp::TargetExitDataOp>) {
|
|
directive = llvm::omp::Directive::OMPD_target_exit_data;
|
|
} else if constexpr (std::is_same_v<OpTy, mlir::omp::TargetUpdateOp>) {
|
|
directive = llvm::omp::Directive::OMPD_target_update;
|
|
} else {
|
|
llvm_unreachable("Unexpected TARGET DATA construct");
|
|
}
|
|
|
|
mlir::omp::TargetEnterExitUpdateDataOperands clauseOps;
|
|
genTargetEnterExitUpdateDataClauses(converter, semaCtx, symTable, stmtCtx,
|
|
item->clauses, loc, directive, clauseOps);
|
|
|
|
return firOpBuilder.create<OpTy>(loc, clauseOps);
|
|
}
|
|
|
|
static mlir::omp::TaskOp
|
|
genTaskOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
mlir::Location loc, const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
mlir::omp::TaskOperands clauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> inReductionSyms;
|
|
genTaskClauses(converter, semaCtx, symTable, stmtCtx, item->clauses, loc,
|
|
clauseOps, inReductionSyms);
|
|
|
|
if (!enableDelayedPrivatization)
|
|
return genOpWithBody<mlir::omp::TaskOp>(
|
|
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
|
|
llvm::omp::Directive::OMPD_task)
|
|
.setClauses(&item->clauses),
|
|
queue, item, clauseOps);
|
|
|
|
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
|
|
lower::omp::isLastItemInQueue(item, queue),
|
|
/*useDelayedPrivatization=*/true, symTable);
|
|
dsp.processStep1(&clauseOps);
|
|
|
|
EntryBlockArgs taskArgs;
|
|
taskArgs.priv.syms = dsp.getDelayedPrivSymbols();
|
|
taskArgs.priv.vars = clauseOps.privateVars;
|
|
taskArgs.inReduction.syms = inReductionSyms;
|
|
taskArgs.inReduction.vars = clauseOps.inReductionVars;
|
|
|
|
return genOpWithBody<mlir::omp::TaskOp>(
|
|
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
|
|
llvm::omp::Directive::OMPD_task)
|
|
.setClauses(&item->clauses)
|
|
.setDataSharingProcessor(&dsp)
|
|
.setEntryBlockArgs(&taskArgs),
|
|
queue, item, clauseOps);
|
|
}
|
|
|
|
static mlir::omp::TaskgroupOp
|
|
genTaskgroupOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
mlir::omp::TaskgroupOperands clauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> taskReductionSyms;
|
|
genTaskgroupClauses(converter, semaCtx, item->clauses, loc, clauseOps,
|
|
taskReductionSyms);
|
|
|
|
EntryBlockArgs taskgroupArgs;
|
|
taskgroupArgs.taskReduction.syms = taskReductionSyms;
|
|
taskgroupArgs.taskReduction.vars = clauseOps.taskReductionVars;
|
|
|
|
return genOpWithBody<mlir::omp::TaskgroupOp>(
|
|
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
|
|
llvm::omp::Directive::OMPD_taskgroup)
|
|
.setClauses(&item->clauses)
|
|
.setEntryBlockArgs(&taskgroupArgs),
|
|
queue, item, clauseOps);
|
|
}
|
|
|
|
static mlir::omp::TaskwaitOp
|
|
genTaskwaitOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
mlir::omp::TaskwaitOperands clauseOps;
|
|
genTaskwaitClauses(converter, semaCtx, item->clauses, loc, clauseOps);
|
|
return converter.getFirOpBuilder().create<mlir::omp::TaskwaitOp>(loc,
|
|
clauseOps);
|
|
}
|
|
|
|
static mlir::omp::TaskyieldOp
|
|
genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
return converter.getFirOpBuilder().create<mlir::omp::TaskyieldOp>(loc);
|
|
}
|
|
|
|
static mlir::omp::WorkshareOp genWorkshareOp(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
|
|
mlir::omp::WorkshareOperands clauseOps;
|
|
genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
|
|
clauseOps);
|
|
|
|
return genOpWithBody<mlir::omp::WorkshareOp>(
|
|
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
|
|
llvm::omp::Directive::OMPD_workshare)
|
|
.setClauses(&item->clauses),
|
|
queue, item, clauseOps);
|
|
}
|
|
|
|
static mlir::omp::TeamsOp
|
|
genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
mlir::Location loc, const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
mlir::omp::TeamsOperands clauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> reductionSyms;
|
|
genTeamsClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps,
|
|
reductionSyms);
|
|
|
|
EntryBlockArgs args;
|
|
// TODO: Add private syms and vars.
|
|
args.reduction.syms = reductionSyms;
|
|
args.reduction.vars = clauseOps.reductionVars;
|
|
|
|
return genOpWithBody<mlir::omp::TeamsOp>(
|
|
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
|
|
llvm::omp::Directive::OMPD_teams)
|
|
.setClauses(&item->clauses)
|
|
.setEntryBlockArgs(&args),
|
|
queue, item, clauseOps);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Code generation for atomic operations
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// Populates \p hint and \p memoryOrder with appropriate clause information
|
|
/// if present on atomic construct.
|
|
static void genOmpAtomicHintAndMemoryOrderClauses(
|
|
lower::AbstractConverter &converter,
|
|
const parser::OmpAtomicClauseList &clauseList, mlir::IntegerAttr &hint,
|
|
mlir::omp::ClauseMemoryOrderKindAttr &memoryOrder) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
for (const parser::OmpAtomicClause &clause : clauseList.v) {
|
|
common::visit(
|
|
common::visitors{
|
|
[&](const parser::OmpMemoryOrderClause &s) {
|
|
auto kind = common::visit(
|
|
common::visitors{
|
|
[&](const parser::OmpClause::AcqRel &) {
|
|
return mlir::omp::ClauseMemoryOrderKind::Acq_rel;
|
|
},
|
|
[&](const parser::OmpClause::Acquire &) {
|
|
return mlir::omp::ClauseMemoryOrderKind::Acquire;
|
|
},
|
|
[&](const parser::OmpClause::Relaxed &) {
|
|
return mlir::omp::ClauseMemoryOrderKind::Relaxed;
|
|
},
|
|
[&](const parser::OmpClause::Release &) {
|
|
return mlir::omp::ClauseMemoryOrderKind::Release;
|
|
},
|
|
[&](const parser::OmpClause::SeqCst &) {
|
|
return mlir::omp::ClauseMemoryOrderKind::Seq_cst;
|
|
},
|
|
[&](auto &&) -> mlir::omp::ClauseMemoryOrderKind {
|
|
llvm_unreachable("Unexpected clause");
|
|
},
|
|
},
|
|
s.v.u);
|
|
memoryOrder = mlir::omp::ClauseMemoryOrderKindAttr::get(
|
|
firOpBuilder.getContext(), kind);
|
|
},
|
|
[&](const parser::OmpHintClause &s) {
|
|
const auto *expr = semantics::GetExpr(s.v);
|
|
uint64_t hintExprValue = *evaluate::ToInt64(*expr);
|
|
hint = firOpBuilder.getI64IntegerAttr(hintExprValue);
|
|
},
|
|
[&](const parser::OmpFailClause &) {},
|
|
},
|
|
clause.u);
|
|
}
|
|
}
|
|
|
|
static void processOmpAtomicTODO(mlir::Type elementType, mlir::Location loc) {
|
|
if (!elementType)
|
|
return;
|
|
assert(fir::isa_trivial(fir::unwrapRefType(elementType)) &&
|
|
"is supported type for omp atomic");
|
|
}
|
|
|
|
/// Used to generate atomic.read operation which is created in existing
|
|
/// location set by builder.
|
|
static void genAtomicCaptureStatement(
|
|
lower::AbstractConverter &converter, mlir::Value fromAddress,
|
|
mlir::Value toAddress,
|
|
const parser::OmpAtomicClauseList *leftHandClauseList,
|
|
const parser::OmpAtomicClauseList *rightHandClauseList,
|
|
mlir::Type elementType, mlir::Location loc) {
|
|
// Generate `atomic.read` operation for atomic assigment statements
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
|
|
processOmpAtomicTODO(elementType, loc);
|
|
|
|
// If no hint clause is specified, the effect is as if
|
|
// hint(omp_sync_hint_none) had been specified.
|
|
mlir::IntegerAttr hint = nullptr;
|
|
|
|
mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr;
|
|
if (leftHandClauseList)
|
|
genOmpAtomicHintAndMemoryOrderClauses(converter, *leftHandClauseList, hint,
|
|
memoryOrder);
|
|
if (rightHandClauseList)
|
|
genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint,
|
|
memoryOrder);
|
|
firOpBuilder.create<mlir::omp::AtomicReadOp>(loc, fromAddress, toAddress,
|
|
mlir::TypeAttr::get(elementType),
|
|
hint, memoryOrder);
|
|
}
|
|
|
|
/// Used to generate atomic.write operation which is created in existing
|
|
/// location set by builder.
|
|
static void genAtomicWriteStatement(
|
|
lower::AbstractConverter &converter, mlir::Value lhsAddr,
|
|
mlir::Value rhsExpr, const parser::OmpAtomicClauseList *leftHandClauseList,
|
|
const parser::OmpAtomicClauseList *rightHandClauseList, mlir::Location loc,
|
|
mlir::Value *evaluatedExprValue = nullptr) {
|
|
// Generate `atomic.write` operation for atomic assignment statements
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
|
|
mlir::Type varType = fir::unwrapRefType(lhsAddr.getType());
|
|
// Create a conversion outside the capture block.
|
|
auto insertionPoint = firOpBuilder.saveInsertionPoint();
|
|
firOpBuilder.setInsertionPointAfter(rhsExpr.getDefiningOp());
|
|
rhsExpr = firOpBuilder.createConvert(loc, varType, rhsExpr);
|
|
firOpBuilder.restoreInsertionPoint(insertionPoint);
|
|
|
|
processOmpAtomicTODO(varType, loc);
|
|
|
|
// If no hint clause is specified, the effect is as if
|
|
// hint(omp_sync_hint_none) had been specified.
|
|
mlir::IntegerAttr hint = nullptr;
|
|
mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr;
|
|
if (leftHandClauseList)
|
|
genOmpAtomicHintAndMemoryOrderClauses(converter, *leftHandClauseList, hint,
|
|
memoryOrder);
|
|
if (rightHandClauseList)
|
|
genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint,
|
|
memoryOrder);
|
|
firOpBuilder.create<mlir::omp::AtomicWriteOp>(loc, lhsAddr, rhsExpr, hint,
|
|
memoryOrder);
|
|
}
|
|
|
|
/// Used to generate atomic.update operation which is created in existing
|
|
/// location set by builder.
|
|
static void genAtomicUpdateStatement(
|
|
lower::AbstractConverter &converter, mlir::Value lhsAddr,
|
|
mlir::Type varType, const parser::Variable &assignmentStmtVariable,
|
|
const parser::Expr &assignmentStmtExpr,
|
|
const parser::OmpAtomicClauseList *leftHandClauseList,
|
|
const parser::OmpAtomicClauseList *rightHandClauseList, mlir::Location loc,
|
|
mlir::Operation *atomicCaptureOp = nullptr) {
|
|
// Generate `atomic.update` operation for atomic assignment statements
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
mlir::Location currentLocation = converter.getCurrentLocation();
|
|
|
|
// Create the omp.atomic.update or acc.atomic.update operation
|
|
//
|
|
// func.func @_QPsb() {
|
|
// %0 = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFsbEa"}
|
|
// %1 = fir.alloca i32 {bindc_name = "b", uniq_name = "_QFsbEb"}
|
|
// %2 = fir.load %1 : !fir.ref<i32>
|
|
// omp.atomic.update %0 : !fir.ref<i32> {
|
|
// ^bb0(%arg0: i32):
|
|
// %3 = arith.addi %arg0, %2 : i32
|
|
// omp.yield(%3 : i32)
|
|
// }
|
|
// return
|
|
// }
|
|
|
|
auto getArgExpression =
|
|
[](std::list<parser::ActualArgSpec>::const_iterator it) {
|
|
const auto &arg{std::get<parser::ActualArg>((*it).t)};
|
|
const auto *parserExpr{
|
|
std::get_if<common::Indirection<parser::Expr>>(&arg.u)};
|
|
return parserExpr;
|
|
};
|
|
|
|
// Lower any non atomic sub-expression before the atomic operation, and
|
|
// map its lowered value to the semantic representation.
|
|
lower::ExprToValueMap exprValueOverrides;
|
|
// Max and min intrinsics can have a list of Args. Hence we need a list
|
|
// of nonAtomicSubExprs to hoist. Currently, only the load is hoisted.
|
|
llvm::SmallVector<const lower::SomeExpr *> nonAtomicSubExprs;
|
|
common::visit(
|
|
common::visitors{
|
|
[&](const common::Indirection<parser::FunctionReference> &funcRef)
|
|
-> void {
|
|
const auto &args{std::get<std::list<parser::ActualArgSpec>>(
|
|
funcRef.value().v.t)};
|
|
std::list<parser::ActualArgSpec>::const_iterator beginIt =
|
|
args.begin();
|
|
std::list<parser::ActualArgSpec>::const_iterator endIt = args.end();
|
|
const auto *exprFirst{getArgExpression(beginIt)};
|
|
if (exprFirst && exprFirst->value().source ==
|
|
assignmentStmtVariable.GetSource()) {
|
|
// Add everything except the first
|
|
beginIt++;
|
|
} else {
|
|
// Add everything except the last
|
|
endIt--;
|
|
}
|
|
std::list<parser::ActualArgSpec>::const_iterator it;
|
|
for (it = beginIt; it != endIt; it++) {
|
|
const common::Indirection<parser::Expr> *expr =
|
|
getArgExpression(it);
|
|
if (expr)
|
|
nonAtomicSubExprs.push_back(semantics::GetExpr(*expr));
|
|
}
|
|
},
|
|
[&](const auto &op) -> void {
|
|
using T = std::decay_t<decltype(op)>;
|
|
if constexpr (std::is_base_of<parser::Expr::IntrinsicBinary,
|
|
T>::value) {
|
|
const auto &exprLeft{std::get<0>(op.t)};
|
|
const auto &exprRight{std::get<1>(op.t)};
|
|
if (exprLeft.value().source == assignmentStmtVariable.GetSource())
|
|
nonAtomicSubExprs.push_back(semantics::GetExpr(exprRight));
|
|
else
|
|
nonAtomicSubExprs.push_back(semantics::GetExpr(exprLeft));
|
|
}
|
|
},
|
|
},
|
|
assignmentStmtExpr.u);
|
|
lower::StatementContext nonAtomicStmtCtx;
|
|
if (!nonAtomicSubExprs.empty()) {
|
|
// Generate non atomic part before all the atomic operations.
|
|
auto insertionPoint = firOpBuilder.saveInsertionPoint();
|
|
if (atomicCaptureOp)
|
|
firOpBuilder.setInsertionPoint(atomicCaptureOp);
|
|
mlir::Value nonAtomicVal;
|
|
for (auto *nonAtomicSubExpr : nonAtomicSubExprs) {
|
|
nonAtomicVal = fir::getBase(converter.genExprValue(
|
|
currentLocation, *nonAtomicSubExpr, nonAtomicStmtCtx));
|
|
exprValueOverrides.try_emplace(nonAtomicSubExpr, nonAtomicVal);
|
|
}
|
|
if (atomicCaptureOp)
|
|
firOpBuilder.restoreInsertionPoint(insertionPoint);
|
|
}
|
|
|
|
mlir::Operation *atomicUpdateOp = nullptr;
|
|
// If no hint clause is specified, the effect is as if
|
|
// hint(omp_sync_hint_none) had been specified.
|
|
mlir::IntegerAttr hint = nullptr;
|
|
mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr;
|
|
if (leftHandClauseList)
|
|
genOmpAtomicHintAndMemoryOrderClauses(converter, *leftHandClauseList, hint,
|
|
memoryOrder);
|
|
if (rightHandClauseList)
|
|
genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint,
|
|
memoryOrder);
|
|
atomicUpdateOp = firOpBuilder.create<mlir::omp::AtomicUpdateOp>(
|
|
currentLocation, lhsAddr, hint, memoryOrder);
|
|
|
|
processOmpAtomicTODO(varType, loc);
|
|
|
|
llvm::SmallVector<mlir::Type> varTys = {varType};
|
|
llvm::SmallVector<mlir::Location> locs = {currentLocation};
|
|
firOpBuilder.createBlock(&atomicUpdateOp->getRegion(0), {}, varTys, locs);
|
|
mlir::Value val =
|
|
fir::getBase(atomicUpdateOp->getRegion(0).front().getArgument(0));
|
|
|
|
exprValueOverrides.try_emplace(semantics::GetExpr(assignmentStmtVariable),
|
|
val);
|
|
{
|
|
// statement context inside the atomic block.
|
|
converter.overrideExprValues(&exprValueOverrides);
|
|
lower::StatementContext atomicStmtCtx;
|
|
mlir::Value rhsExpr = fir::getBase(converter.genExprValue(
|
|
*semantics::GetExpr(assignmentStmtExpr), atomicStmtCtx));
|
|
mlir::Type exprType = fir::unwrapRefType(rhsExpr.getType());
|
|
if (fir::isa_complex(exprType) && !fir::isa_complex(varType)) {
|
|
// Emit an additional `ExtractValueOp` if the expression is of complex
|
|
// type
|
|
auto extract = firOpBuilder.create<fir::ExtractValueOp>(
|
|
currentLocation,
|
|
mlir::cast<mlir::ComplexType>(exprType).getElementType(), rhsExpr,
|
|
firOpBuilder.getArrayAttr(
|
|
firOpBuilder.getIntegerAttr(firOpBuilder.getIndexType(), 0)));
|
|
mlir::Value convertResult = firOpBuilder.create<fir::ConvertOp>(
|
|
currentLocation, varType, extract);
|
|
firOpBuilder.create<mlir::omp::YieldOp>(currentLocation, convertResult);
|
|
} else {
|
|
mlir::Value convertResult =
|
|
firOpBuilder.createConvert(currentLocation, varType, rhsExpr);
|
|
firOpBuilder.create<mlir::omp::YieldOp>(currentLocation, convertResult);
|
|
}
|
|
converter.resetExprOverrides();
|
|
}
|
|
firOpBuilder.setInsertionPointAfter(atomicUpdateOp);
|
|
}
|
|
|
|
/// Processes an atomic construct with write clause.
|
|
static void genAtomicWrite(lower::AbstractConverter &converter,
|
|
const parser::OmpAtomicWrite &atomicWrite,
|
|
mlir::Location loc) {
|
|
const parser::OmpAtomicClauseList *rightHandClauseList = nullptr;
|
|
const parser::OmpAtomicClauseList *leftHandClauseList = nullptr;
|
|
// Get the address of atomic read operands.
|
|
rightHandClauseList = &std::get<2>(atomicWrite.t);
|
|
leftHandClauseList = &std::get<0>(atomicWrite.t);
|
|
|
|
const parser::AssignmentStmt &stmt =
|
|
std::get<parser::Statement<parser::AssignmentStmt>>(atomicWrite.t)
|
|
.statement;
|
|
const evaluate::Assignment &assign = *stmt.typedAssignment->v;
|
|
lower::StatementContext stmtCtx;
|
|
// Get the value and address of atomic write operands.
|
|
mlir::Value rhsExpr =
|
|
fir::getBase(converter.genExprValue(assign.rhs, stmtCtx));
|
|
mlir::Value lhsAddr =
|
|
fir::getBase(converter.genExprAddr(assign.lhs, stmtCtx));
|
|
genAtomicWriteStatement(converter, lhsAddr, rhsExpr, leftHandClauseList,
|
|
rightHandClauseList, loc);
|
|
}
|
|
|
|
/// Processes an atomic construct with read clause.
|
|
static void genAtomicRead(lower::AbstractConverter &converter,
|
|
const parser::OmpAtomicRead &atomicRead,
|
|
mlir::Location loc) {
|
|
const parser::OmpAtomicClauseList *rightHandClauseList = nullptr;
|
|
const parser::OmpAtomicClauseList *leftHandClauseList = nullptr;
|
|
// Get the address of atomic read operands.
|
|
rightHandClauseList = &std::get<2>(atomicRead.t);
|
|
leftHandClauseList = &std::get<0>(atomicRead.t);
|
|
|
|
const auto &assignmentStmtExpr = std::get<parser::Expr>(
|
|
std::get<parser::Statement<parser::AssignmentStmt>>(atomicRead.t)
|
|
.statement.t);
|
|
const auto &assignmentStmtVariable = std::get<parser::Variable>(
|
|
std::get<parser::Statement<parser::AssignmentStmt>>(atomicRead.t)
|
|
.statement.t);
|
|
|
|
lower::StatementContext stmtCtx;
|
|
const semantics::SomeExpr &fromExpr = *semantics::GetExpr(assignmentStmtExpr);
|
|
mlir::Type elementType = converter.genType(fromExpr);
|
|
mlir::Value fromAddress =
|
|
fir::getBase(converter.genExprAddr(fromExpr, stmtCtx));
|
|
mlir::Value toAddress = fir::getBase(converter.genExprAddr(
|
|
*semantics::GetExpr(assignmentStmtVariable), stmtCtx));
|
|
|
|
if (fromAddress.getType() != toAddress.getType()) {
|
|
// Emit an implicit cast. Different yet compatible types on
|
|
// omp.atomic.read constitute valid Fortran. The OMPIRBuilder will
|
|
// emit atomic instructions (on primitive types) and `__atomic_load`
|
|
// libcall (on complex type) without explicitly converting
|
|
// between such compatible types. The OMPIRBuilder relies on the
|
|
// frontend to resolve such inconsistencies between `omp.atomic.read `
|
|
// operand types. Similar inconsistencies between operand types in
|
|
// `omp.atomic.write` are resolved through implicit casting by use of typed
|
|
// assignment (i.e. `evaluate::Assignment`). However, use of typed
|
|
// assignment in `omp.atomic.read` (of form `v = x`) leads to an unsafe,
|
|
// non-atomic load of `x` into a temporary `alloca`, followed by an atomic
|
|
// read of form `v = alloca`. Hence, it is needed to perform a custom
|
|
// implicit cast.
|
|
|
|
// An atomic read of form `v = x` would (without implicit casting)
|
|
// lower to `omp.atomic.read %v = %x : !fir.ref<type1>, !fir.ref<type2>,
|
|
// type2`. This implicit casting will rather generate the following FIR:
|
|
//
|
|
// %alloca = fir.alloca type2
|
|
// omp.atomic.read %alloca = %x : !fir.ref<type2>, !fir.ref<type2>, type2
|
|
// %load = fir.load %alloca : !fir.ref<type2>
|
|
// %cvt = fir.convert %load : (type2) -> type1
|
|
// fir.store %cvt to %v : !fir.ref<type1>
|
|
|
|
// These sequence of operations is thread-safe since each thread allocates
|
|
// the `alloca` in its stack, and performs `%alloca = %x` atomically. Once
|
|
// safely read, each thread performs the implicit cast on the local
|
|
// `alloca`, and writes the final result to `%v`.
|
|
mlir::Type toType = fir::unwrapRefType(toAddress.getType());
|
|
mlir::Type fromType = fir::unwrapRefType(fromAddress.getType());
|
|
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
|
|
auto oldIP = builder.saveInsertionPoint();
|
|
builder.setInsertionPointToStart(builder.getAllocaBlock());
|
|
mlir::Value alloca = builder.create<fir::AllocaOp>(
|
|
loc, fromType); // Thread scope `alloca` to atomically read `%x`.
|
|
builder.restoreInsertionPoint(oldIP);
|
|
genAtomicCaptureStatement(converter, fromAddress, alloca,
|
|
leftHandClauseList, rightHandClauseList,
|
|
elementType, loc);
|
|
auto load = builder.create<fir::LoadOp>(loc, alloca);
|
|
if (fir::isa_complex(fromType) && !fir::isa_complex(toType)) {
|
|
// Emit an additional `ExtractValueOp` if `fromAddress` is of complex
|
|
// type, but `toAddress` is not.
|
|
auto extract = builder.create<fir::ExtractValueOp>(
|
|
loc, mlir::cast<mlir::ComplexType>(fromType).getElementType(), load,
|
|
builder.getArrayAttr(
|
|
builder.getIntegerAttr(builder.getIndexType(), 0)));
|
|
auto cvt = builder.create<fir::ConvertOp>(loc, toType, extract);
|
|
builder.create<fir::StoreOp>(loc, cvt, toAddress);
|
|
} else if (!fir::isa_complex(fromType) && fir::isa_complex(toType)) {
|
|
// Emit an additional `InsertValueOp` if `toAddress` is of complex
|
|
// type, but `fromAddress` is not.
|
|
mlir::Value undef = builder.create<fir::UndefOp>(loc, toType);
|
|
mlir::Type complexEleTy =
|
|
mlir::cast<mlir::ComplexType>(toType).getElementType();
|
|
mlir::Value cvt = builder.create<fir::ConvertOp>(loc, complexEleTy, load);
|
|
mlir::Value zero = builder.createRealZeroConstant(loc, complexEleTy);
|
|
mlir::Value idx0 = builder.create<fir::InsertValueOp>(
|
|
loc, toType, undef, cvt,
|
|
builder.getArrayAttr(
|
|
builder.getIntegerAttr(builder.getIndexType(), 0)));
|
|
mlir::Value idx1 = builder.create<fir::InsertValueOp>(
|
|
loc, toType, idx0, zero,
|
|
builder.getArrayAttr(
|
|
builder.getIntegerAttr(builder.getIndexType(), 1)));
|
|
builder.create<fir::StoreOp>(loc, idx1, toAddress);
|
|
} else {
|
|
auto cvt = builder.create<fir::ConvertOp>(loc, toType, load);
|
|
builder.create<fir::StoreOp>(loc, cvt, toAddress);
|
|
}
|
|
} else
|
|
genAtomicCaptureStatement(converter, fromAddress, toAddress,
|
|
leftHandClauseList, rightHandClauseList,
|
|
elementType, loc);
|
|
}
|
|
|
|
/// Processes an atomic construct with update clause.
|
|
static void genAtomicUpdate(lower::AbstractConverter &converter,
|
|
const parser::OmpAtomicUpdate &atomicUpdate,
|
|
mlir::Location loc) {
|
|
const parser::OmpAtomicClauseList *rightHandClauseList = nullptr;
|
|
const parser::OmpAtomicClauseList *leftHandClauseList = nullptr;
|
|
// Get the address of atomic read operands.
|
|
rightHandClauseList = &std::get<2>(atomicUpdate.t);
|
|
leftHandClauseList = &std::get<0>(atomicUpdate.t);
|
|
|
|
const auto &assignmentStmtExpr = std::get<parser::Expr>(
|
|
std::get<parser::Statement<parser::AssignmentStmt>>(atomicUpdate.t)
|
|
.statement.t);
|
|
const auto &assignmentStmtVariable = std::get<parser::Variable>(
|
|
std::get<parser::Statement<parser::AssignmentStmt>>(atomicUpdate.t)
|
|
.statement.t);
|
|
|
|
lower::StatementContext stmtCtx;
|
|
mlir::Value lhsAddr = fir::getBase(converter.genExprAddr(
|
|
*semantics::GetExpr(assignmentStmtVariable), stmtCtx));
|
|
mlir::Type varType = fir::unwrapRefType(lhsAddr.getType());
|
|
genAtomicUpdateStatement(converter, lhsAddr, varType, assignmentStmtVariable,
|
|
assignmentStmtExpr, leftHandClauseList,
|
|
rightHandClauseList, loc);
|
|
}
|
|
|
|
/// Processes an atomic construct with no clause - which implies update clause.
|
|
static void genOmpAtomic(lower::AbstractConverter &converter,
|
|
const parser::OmpAtomic &atomicConstruct,
|
|
mlir::Location loc) {
|
|
const parser::OmpAtomicClauseList &atomicClauseList =
|
|
std::get<parser::OmpAtomicClauseList>(atomicConstruct.t);
|
|
const auto &assignmentStmtExpr = std::get<parser::Expr>(
|
|
std::get<parser::Statement<parser::AssignmentStmt>>(atomicConstruct.t)
|
|
.statement.t);
|
|
const auto &assignmentStmtVariable = std::get<parser::Variable>(
|
|
std::get<parser::Statement<parser::AssignmentStmt>>(atomicConstruct.t)
|
|
.statement.t);
|
|
lower::StatementContext stmtCtx;
|
|
mlir::Value lhsAddr = fir::getBase(converter.genExprAddr(
|
|
*semantics::GetExpr(assignmentStmtVariable), stmtCtx));
|
|
mlir::Type varType = fir::unwrapRefType(lhsAddr.getType());
|
|
// If atomic-clause is not present on the construct, the behaviour is as if
|
|
// the update clause is specified (for both OpenMP and OpenACC).
|
|
genAtomicUpdateStatement(converter, lhsAddr, varType, assignmentStmtVariable,
|
|
assignmentStmtExpr, &atomicClauseList, nullptr, loc);
|
|
}
|
|
|
|
/// Processes an atomic construct with capture clause.
|
|
static void genAtomicCapture(lower::AbstractConverter &converter,
|
|
const parser::OmpAtomicCapture &atomicCapture,
|
|
mlir::Location loc) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
|
|
const parser::AssignmentStmt &stmt1 =
|
|
std::get<parser::OmpAtomicCapture::Stmt1>(atomicCapture.t).v.statement;
|
|
const evaluate::Assignment &assign1 = *stmt1.typedAssignment->v;
|
|
const auto &stmt1Var{std::get<parser::Variable>(stmt1.t)};
|
|
const auto &stmt1Expr{std::get<parser::Expr>(stmt1.t)};
|
|
const parser::AssignmentStmt &stmt2 =
|
|
std::get<parser::OmpAtomicCapture::Stmt2>(atomicCapture.t).v.statement;
|
|
const evaluate::Assignment &assign2 = *stmt2.typedAssignment->v;
|
|
const auto &stmt2Var{std::get<parser::Variable>(stmt2.t)};
|
|
const auto &stmt2Expr{std::get<parser::Expr>(stmt2.t)};
|
|
|
|
// Pre-evaluate expressions to be used in the various operations inside
|
|
// `atomic.capture` since it is not desirable to have anything other than
|
|
// a `atomic.read`, `atomic.write`, or `atomic.update` operation
|
|
// inside `atomic.capture`
|
|
lower::StatementContext stmtCtx;
|
|
// LHS evaluations are common to all combinations of `atomic.capture`
|
|
mlir::Value stmt1LHSArg =
|
|
fir::getBase(converter.genExprAddr(assign1.lhs, stmtCtx));
|
|
mlir::Value stmt2LHSArg =
|
|
fir::getBase(converter.genExprAddr(assign2.lhs, stmtCtx));
|
|
|
|
// Type information used in generation of `atomic.update` operation
|
|
mlir::Type stmt1VarType =
|
|
fir::getBase(converter.genExprValue(assign1.lhs, stmtCtx)).getType();
|
|
mlir::Type stmt2VarType =
|
|
fir::getBase(converter.genExprValue(assign2.lhs, stmtCtx)).getType();
|
|
|
|
// Check if implicit type is needed
|
|
if (stmt1VarType != stmt2VarType)
|
|
TODO(loc, "atomic capture requiring implicit type casts");
|
|
|
|
mlir::Operation *atomicCaptureOp = nullptr;
|
|
mlir::IntegerAttr hint = nullptr;
|
|
mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr;
|
|
const parser::OmpAtomicClauseList &rightHandClauseList =
|
|
std::get<2>(atomicCapture.t);
|
|
const parser::OmpAtomicClauseList &leftHandClauseList =
|
|
std::get<0>(atomicCapture.t);
|
|
genOmpAtomicHintAndMemoryOrderClauses(converter, leftHandClauseList, hint,
|
|
memoryOrder);
|
|
genOmpAtomicHintAndMemoryOrderClauses(converter, rightHandClauseList, hint,
|
|
memoryOrder);
|
|
atomicCaptureOp =
|
|
firOpBuilder.create<mlir::omp::AtomicCaptureOp>(loc, hint, memoryOrder);
|
|
|
|
firOpBuilder.createBlock(&(atomicCaptureOp->getRegion(0)));
|
|
mlir::Block &block = atomicCaptureOp->getRegion(0).back();
|
|
firOpBuilder.setInsertionPointToStart(&block);
|
|
if (semantics::checkForSingleVariableOnRHS(stmt1)) {
|
|
if (semantics::checkForSymbolMatch(stmt2)) {
|
|
// Atomic capture construct is of the form [capture-stmt, update-stmt]
|
|
const semantics::SomeExpr &fromExpr = *semantics::GetExpr(stmt1Expr);
|
|
mlir::Type elementType = converter.genType(fromExpr);
|
|
genAtomicCaptureStatement(converter, stmt2LHSArg, stmt1LHSArg,
|
|
/*leftHandClauseList=*/nullptr,
|
|
/*rightHandClauseList=*/nullptr, elementType,
|
|
loc);
|
|
genAtomicUpdateStatement(
|
|
converter, stmt2LHSArg, stmt2VarType, stmt2Var, stmt2Expr,
|
|
/*leftHandClauseList=*/nullptr,
|
|
/*rightHandClauseList=*/nullptr, loc, atomicCaptureOp);
|
|
} else {
|
|
// Atomic capture construct is of the form [capture-stmt, write-stmt]
|
|
firOpBuilder.setInsertionPoint(atomicCaptureOp);
|
|
mlir::Value stmt2RHSArg =
|
|
fir::getBase(converter.genExprValue(assign2.rhs, stmtCtx));
|
|
firOpBuilder.setInsertionPointToStart(&block);
|
|
const semantics::SomeExpr &fromExpr = *semantics::GetExpr(stmt1Expr);
|
|
mlir::Type elementType = converter.genType(fromExpr);
|
|
genAtomicCaptureStatement(converter, stmt2LHSArg, stmt1LHSArg,
|
|
/*leftHandClauseList=*/nullptr,
|
|
/*rightHandClauseList=*/nullptr, elementType,
|
|
loc);
|
|
genAtomicWriteStatement(converter, stmt2LHSArg, stmt2RHSArg,
|
|
/*leftHandClauseList=*/nullptr,
|
|
/*rightHandClauseList=*/nullptr, loc);
|
|
}
|
|
} else {
|
|
// Atomic capture construct is of the form [update-stmt, capture-stmt]
|
|
const semantics::SomeExpr &fromExpr = *semantics::GetExpr(stmt2Expr);
|
|
mlir::Type elementType = converter.genType(fromExpr);
|
|
genAtomicUpdateStatement(
|
|
converter, stmt1LHSArg, stmt1VarType, stmt1Var, stmt1Expr,
|
|
/*leftHandClauseList=*/nullptr,
|
|
/*rightHandClauseList=*/nullptr, loc, atomicCaptureOp);
|
|
genAtomicCaptureStatement(converter, stmt1LHSArg, stmt2LHSArg,
|
|
/*leftHandClauseList=*/nullptr,
|
|
/*rightHandClauseList=*/nullptr, elementType,
|
|
loc);
|
|
}
|
|
firOpBuilder.setInsertionPointToEnd(&block);
|
|
firOpBuilder.create<mlir::omp::TerminatorOp>(loc);
|
|
firOpBuilder.setInsertionPointToStart(&block);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Code generation functions for the standalone version of constructs that can
|
|
// also be a leaf of a composite construct
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static mlir::omp::DistributeOp genStandaloneDistribute(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
|
|
mlir::omp::DistributeOperands distributeClauseOps;
|
|
genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
|
|
distributeClauseOps);
|
|
|
|
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
|
|
/*shouldCollectPreDeterminedSymbols=*/true,
|
|
enableDelayedPrivatization, symTable);
|
|
dsp.processStep1(&distributeClauseOps);
|
|
|
|
mlir::omp::LoopNestOperands loopNestClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> iv;
|
|
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
|
|
loopNestClauseOps, iv);
|
|
|
|
EntryBlockArgs distributeArgs;
|
|
distributeArgs.priv.syms = dsp.getDelayedPrivSymbols();
|
|
distributeArgs.priv.vars = distributeClauseOps.privateVars;
|
|
auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
|
|
converter, loc, distributeClauseOps, distributeArgs);
|
|
|
|
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
|
|
loopNestClauseOps, iv, {{distributeOp, distributeArgs}},
|
|
llvm::omp::Directive::OMPD_distribute, dsp);
|
|
return distributeOp;
|
|
}
|
|
|
|
static mlir::omp::WsloopOp genStandaloneDo(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
|
|
mlir::omp::WsloopOperands wsloopClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
|
|
genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
|
|
wsloopClauseOps, wsloopReductionSyms);
|
|
|
|
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
|
|
/*shouldCollectPreDeterminedSymbols=*/true,
|
|
enableDelayedPrivatization, symTable);
|
|
dsp.processStep1(&wsloopClauseOps);
|
|
|
|
mlir::omp::LoopNestOperands loopNestClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> iv;
|
|
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
|
|
loopNestClauseOps, iv);
|
|
|
|
EntryBlockArgs wsloopArgs;
|
|
wsloopArgs.priv.syms = dsp.getDelayedPrivSymbols();
|
|
wsloopArgs.priv.vars = wsloopClauseOps.privateVars;
|
|
wsloopArgs.reduction.syms = wsloopReductionSyms;
|
|
wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
|
|
auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
|
|
converter, loc, wsloopClauseOps, wsloopArgs);
|
|
|
|
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
|
|
loopNestClauseOps, iv, {{wsloopOp, wsloopArgs}},
|
|
llvm::omp::Directive::OMPD_do, dsp);
|
|
return wsloopOp;
|
|
}
|
|
|
|
static mlir::omp::ParallelOp genStandaloneParallel(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
|
|
mlir::omp::ParallelOperands parallelClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
|
|
genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
|
|
parallelClauseOps, parallelReductionSyms);
|
|
|
|
std::optional<DataSharingProcessor> dsp;
|
|
if (enableDelayedPrivatization) {
|
|
dsp.emplace(converter, semaCtx, item->clauses, eval,
|
|
lower::omp::isLastItemInQueue(item, queue),
|
|
/*useDelayedPrivatization=*/true, symTable);
|
|
dsp->processStep1(¶llelClauseOps);
|
|
}
|
|
|
|
EntryBlockArgs parallelArgs;
|
|
if (dsp)
|
|
parallelArgs.priv.syms = dsp->getDelayedPrivSymbols();
|
|
parallelArgs.priv.vars = parallelClauseOps.privateVars;
|
|
parallelArgs.reduction.syms = parallelReductionSyms;
|
|
parallelArgs.reduction.vars = parallelClauseOps.reductionVars;
|
|
return genParallelOp(converter, symTable, semaCtx, eval, loc, queue, item,
|
|
parallelClauseOps, parallelArgs,
|
|
enableDelayedPrivatization ? &dsp.value() : nullptr);
|
|
}
|
|
|
|
static mlir::omp::SimdOp
|
|
genStandaloneSimd(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
mlir::omp::SimdOperands simdClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
|
|
genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps,
|
|
simdReductionSyms);
|
|
|
|
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
|
|
/*shouldCollectPreDeterminedSymbols=*/true,
|
|
enableDelayedPrivatization, symTable);
|
|
dsp.processStep1(&simdClauseOps);
|
|
|
|
mlir::omp::LoopNestOperands loopNestClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> iv;
|
|
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
|
|
loopNestClauseOps, iv);
|
|
|
|
EntryBlockArgs simdArgs;
|
|
simdArgs.priv.syms = dsp.getDelayedPrivSymbols();
|
|
simdArgs.priv.vars = simdClauseOps.privateVars;
|
|
simdArgs.reduction.syms = simdReductionSyms;
|
|
simdArgs.reduction.vars = simdClauseOps.reductionVars;
|
|
auto simdOp =
|
|
genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
|
|
|
|
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
|
|
loopNestClauseOps, iv, {{simdOp, simdArgs}},
|
|
llvm::omp::Directive::OMPD_simd, dsp);
|
|
return simdOp;
|
|
}
|
|
|
|
static mlir::omp::TaskloopOp genStandaloneTaskloop(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
mlir::Location loc, const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
mlir::omp::TaskloopOperands taskloopClauseOps;
|
|
genTaskloopClauses(converter, semaCtx, item->clauses, loc, taskloopClauseOps);
|
|
|
|
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
|
|
/*shouldCollectPreDeterminedSymbols=*/true,
|
|
enableDelayedPrivatization, symTable);
|
|
dsp.processStep1(&taskloopClauseOps);
|
|
|
|
mlir::omp::LoopNestOperands loopNestClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> iv;
|
|
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
|
|
loopNestClauseOps, iv);
|
|
|
|
EntryBlockArgs taskloopArgs;
|
|
taskloopArgs.priv.syms = dsp.getDelayedPrivSymbols();
|
|
taskloopArgs.priv.vars = taskloopClauseOps.privateVars;
|
|
|
|
auto taskLoopOp = genWrapperOp<mlir::omp::TaskloopOp>(
|
|
converter, loc, taskloopClauseOps, taskloopArgs);
|
|
|
|
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
|
|
loopNestClauseOps, iv, {{taskLoopOp, taskloopArgs}},
|
|
llvm::omp::Directive::OMPD_taskloop, dsp);
|
|
return taskLoopOp;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Code generation functions for composite constructs
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static mlir::omp::DistributeOp genCompositeDistributeParallelDo(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
|
|
assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs");
|
|
ConstructQueue::const_iterator distributeItem = item;
|
|
ConstructQueue::const_iterator parallelItem = std::next(distributeItem);
|
|
ConstructQueue::const_iterator doItem = std::next(parallelItem);
|
|
|
|
// Create parent omp.parallel first.
|
|
mlir::omp::ParallelOperands parallelClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
|
|
genParallelClauses(converter, semaCtx, stmtCtx, parallelItem->clauses, loc,
|
|
parallelClauseOps, parallelReductionSyms);
|
|
|
|
DataSharingProcessor dsp(converter, semaCtx, doItem->clauses, eval,
|
|
/*shouldCollectPreDeterminedSymbols=*/true,
|
|
/*useDelayedPrivatization=*/true, symTable);
|
|
dsp.processStep1(¶llelClauseOps);
|
|
|
|
EntryBlockArgs parallelArgs;
|
|
parallelArgs.priv.syms = dsp.getDelayedPrivSymbols();
|
|
parallelArgs.priv.vars = parallelClauseOps.privateVars;
|
|
parallelArgs.reduction.syms = parallelReductionSyms;
|
|
parallelArgs.reduction.vars = parallelClauseOps.reductionVars;
|
|
genParallelOp(converter, symTable, semaCtx, eval, loc, queue, parallelItem,
|
|
parallelClauseOps, parallelArgs, &dsp, /*isComposite=*/true);
|
|
|
|
// Clause processing.
|
|
mlir::omp::DistributeOperands distributeClauseOps;
|
|
genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses,
|
|
loc, distributeClauseOps);
|
|
|
|
mlir::omp::WsloopOperands wsloopClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
|
|
genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc,
|
|
wsloopClauseOps, wsloopReductionSyms);
|
|
|
|
mlir::omp::LoopNestOperands loopNestClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> iv;
|
|
genLoopNestClauses(converter, semaCtx, eval, doItem->clauses, loc,
|
|
loopNestClauseOps, iv);
|
|
|
|
// Operation creation.
|
|
EntryBlockArgs distributeArgs;
|
|
// TODO: Add private syms and vars.
|
|
auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
|
|
converter, loc, distributeClauseOps, distributeArgs);
|
|
distributeOp.setComposite(/*val=*/true);
|
|
|
|
EntryBlockArgs wsloopArgs;
|
|
// TODO: Add private syms and vars.
|
|
wsloopArgs.reduction.syms = wsloopReductionSyms;
|
|
wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
|
|
auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
|
|
converter, loc, wsloopClauseOps, wsloopArgs);
|
|
wsloopOp.setComposite(/*val=*/true);
|
|
|
|
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, doItem,
|
|
loopNestClauseOps, iv,
|
|
{{distributeOp, distributeArgs}, {wsloopOp, wsloopArgs}},
|
|
llvm::omp::Directive::OMPD_distribute_parallel_do, dsp);
|
|
return distributeOp;
|
|
}
|
|
|
|
static mlir::omp::DistributeOp genCompositeDistributeParallelDoSimd(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
|
|
assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs");
|
|
ConstructQueue::const_iterator distributeItem = item;
|
|
ConstructQueue::const_iterator parallelItem = std::next(distributeItem);
|
|
ConstructQueue::const_iterator doItem = std::next(parallelItem);
|
|
ConstructQueue::const_iterator simdItem = std::next(doItem);
|
|
|
|
// Create parent omp.parallel first.
|
|
mlir::omp::ParallelOperands parallelClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
|
|
genParallelClauses(converter, semaCtx, stmtCtx, parallelItem->clauses, loc,
|
|
parallelClauseOps, parallelReductionSyms);
|
|
|
|
DataSharingProcessor parallelItemDSP(
|
|
converter, semaCtx, parallelItem->clauses, eval,
|
|
/*shouldCollectPreDeterminedSymbols=*/false,
|
|
/*useDelayedPrivatization=*/true, symTable);
|
|
parallelItemDSP.processStep1(¶llelClauseOps);
|
|
|
|
EntryBlockArgs parallelArgs;
|
|
parallelArgs.priv.syms = parallelItemDSP.getDelayedPrivSymbols();
|
|
parallelArgs.priv.vars = parallelClauseOps.privateVars;
|
|
parallelArgs.reduction.syms = parallelReductionSyms;
|
|
parallelArgs.reduction.vars = parallelClauseOps.reductionVars;
|
|
genParallelOp(converter, symTable, semaCtx, eval, loc, queue, parallelItem,
|
|
parallelClauseOps, parallelArgs, ¶llelItemDSP,
|
|
/*isComposite=*/true);
|
|
|
|
// Clause processing.
|
|
mlir::omp::DistributeOperands distributeClauseOps;
|
|
genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses,
|
|
loc, distributeClauseOps);
|
|
|
|
mlir::omp::WsloopOperands wsloopClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
|
|
genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc,
|
|
wsloopClauseOps, wsloopReductionSyms);
|
|
|
|
mlir::omp::SimdOperands simdClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
|
|
genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps,
|
|
simdReductionSyms);
|
|
|
|
DataSharingProcessor simdItemDSP(converter, semaCtx, simdItem->clauses, eval,
|
|
/*shouldCollectPreDeterminedSymbols=*/true,
|
|
/*useDelayedPrivatization=*/true, symTable);
|
|
simdItemDSP.processStep1(&simdClauseOps);
|
|
|
|
mlir::omp::LoopNestOperands loopNestClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> iv;
|
|
genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc,
|
|
loopNestClauseOps, iv);
|
|
|
|
// Operation creation.
|
|
EntryBlockArgs distributeArgs;
|
|
// TODO: Add private syms and vars.
|
|
auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
|
|
converter, loc, distributeClauseOps, distributeArgs);
|
|
distributeOp.setComposite(/*val=*/true);
|
|
|
|
EntryBlockArgs wsloopArgs;
|
|
// TODO: Add private syms and vars.
|
|
wsloopArgs.reduction.syms = wsloopReductionSyms;
|
|
wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
|
|
auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
|
|
converter, loc, wsloopClauseOps, wsloopArgs);
|
|
wsloopOp.setComposite(/*val=*/true);
|
|
|
|
EntryBlockArgs simdArgs;
|
|
simdArgs.priv.syms = simdItemDSP.getDelayedPrivSymbols();
|
|
simdArgs.priv.vars = simdClauseOps.privateVars;
|
|
simdArgs.reduction.syms = simdReductionSyms;
|
|
simdArgs.reduction.vars = simdClauseOps.reductionVars;
|
|
auto simdOp =
|
|
genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
|
|
simdOp.setComposite(/*val=*/true);
|
|
|
|
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem,
|
|
loopNestClauseOps, iv,
|
|
{{distributeOp, distributeArgs},
|
|
{wsloopOp, wsloopArgs},
|
|
{simdOp, simdArgs}},
|
|
llvm::omp::Directive::OMPD_distribute_parallel_do_simd,
|
|
simdItemDSP);
|
|
return distributeOp;
|
|
}
|
|
|
|
static mlir::omp::DistributeOp genCompositeDistributeSimd(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
|
|
assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
|
|
ConstructQueue::const_iterator distributeItem = item;
|
|
ConstructQueue::const_iterator simdItem = std::next(distributeItem);
|
|
|
|
// Clause processing.
|
|
mlir::omp::DistributeOperands distributeClauseOps;
|
|
genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses,
|
|
loc, distributeClauseOps);
|
|
|
|
mlir::omp::SimdOperands simdClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
|
|
genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps,
|
|
simdReductionSyms);
|
|
|
|
// TODO: Support delayed privatization.
|
|
DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
|
|
/*shouldCollectPreDeterminedSymbols=*/true,
|
|
/*useDelayedPrivatization=*/false, symTable);
|
|
dsp.processStep1();
|
|
|
|
// Pass the innermost leaf construct's clauses because that's where COLLAPSE
|
|
// is placed by construct decomposition.
|
|
mlir::omp::LoopNestOperands loopNestClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> iv;
|
|
genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc,
|
|
loopNestClauseOps, iv);
|
|
|
|
// Operation creation.
|
|
EntryBlockArgs distributeArgs;
|
|
// TODO: Add private syms and vars.
|
|
auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
|
|
converter, loc, distributeClauseOps, distributeArgs);
|
|
distributeOp.setComposite(/*val=*/true);
|
|
|
|
EntryBlockArgs simdArgs;
|
|
// TODO: Add private syms and vars.
|
|
simdArgs.reduction.syms = simdReductionSyms;
|
|
simdArgs.reduction.vars = simdClauseOps.reductionVars;
|
|
auto simdOp =
|
|
genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
|
|
simdOp.setComposite(/*val=*/true);
|
|
|
|
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem,
|
|
loopNestClauseOps, iv,
|
|
{{distributeOp, distributeArgs}, {simdOp, simdArgs}},
|
|
llvm::omp::Directive::OMPD_distribute_simd, dsp);
|
|
return distributeOp;
|
|
}
|
|
|
|
static mlir::omp::WsloopOp genCompositeDoSimd(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
|
|
assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
|
|
ConstructQueue::const_iterator doItem = item;
|
|
ConstructQueue::const_iterator simdItem = std::next(doItem);
|
|
|
|
// Clause processing.
|
|
mlir::omp::WsloopOperands wsloopClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
|
|
genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc,
|
|
wsloopClauseOps, wsloopReductionSyms);
|
|
|
|
mlir::omp::SimdOperands simdClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
|
|
genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps,
|
|
simdReductionSyms);
|
|
|
|
// TODO: Support delayed privatization.
|
|
DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
|
|
/*shouldCollectPreDeterminedSymbols=*/true,
|
|
/*useDelayedPrivatization=*/false, symTable);
|
|
dsp.processStep1();
|
|
|
|
// Pass the innermost leaf construct's clauses because that's where COLLAPSE
|
|
// is placed by construct decomposition.
|
|
mlir::omp::LoopNestOperands loopNestClauseOps;
|
|
llvm::SmallVector<const semantics::Symbol *> iv;
|
|
genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc,
|
|
loopNestClauseOps, iv);
|
|
|
|
// Operation creation.
|
|
EntryBlockArgs wsloopArgs;
|
|
// TODO: Add private syms and vars.
|
|
wsloopArgs.reduction.syms = wsloopReductionSyms;
|
|
wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
|
|
auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
|
|
converter, loc, wsloopClauseOps, wsloopArgs);
|
|
wsloopOp.setComposite(/*val=*/true);
|
|
|
|
EntryBlockArgs simdArgs;
|
|
// TODO: Add private syms and vars.
|
|
simdArgs.reduction.syms = simdReductionSyms;
|
|
simdArgs.reduction.vars = simdClauseOps.reductionVars;
|
|
auto simdOp =
|
|
genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
|
|
simdOp.setComposite(/*val=*/true);
|
|
|
|
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem,
|
|
loopNestClauseOps, iv,
|
|
{{wsloopOp, wsloopArgs}, {simdOp, simdArgs}},
|
|
llvm::omp::Directive::OMPD_do_simd, dsp);
|
|
return wsloopOp;
|
|
}
|
|
|
|
static mlir::omp::TaskloopOp genCompositeTaskloopSimd(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
|
|
assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
|
|
TODO(loc, "Composite TASKLOOP SIMD");
|
|
return nullptr;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Dispatch
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static bool genOMPCompositeDispatch(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue, ConstructQueue::const_iterator item,
|
|
mlir::Operation *&newOp) {
|
|
using llvm::omp::Directive;
|
|
using lower::omp::matchLeafSequence;
|
|
|
|
// TODO: Privatization for composite constructs is currently only done based
|
|
// on the clauses for their last leaf construct, which may not always be
|
|
// correct. Consider per-leaf privatization of composite constructs once
|
|
// delayed privatization is supported by all participating ops.
|
|
if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do))
|
|
newOp = genCompositeDistributeParallelDo(converter, symTable, stmtCtx,
|
|
semaCtx, eval, loc, queue, item);
|
|
else if (matchLeafSequence(item, queue,
|
|
Directive::OMPD_distribute_parallel_do_simd))
|
|
newOp = genCompositeDistributeParallelDoSimd(
|
|
converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
|
|
else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_simd))
|
|
newOp = genCompositeDistributeSimd(converter, symTable, stmtCtx, semaCtx,
|
|
eval, loc, queue, item);
|
|
else if (matchLeafSequence(item, queue, Directive::OMPD_do_simd))
|
|
newOp = genCompositeDoSimd(converter, symTable, stmtCtx, semaCtx, eval, loc,
|
|
queue, item);
|
|
else if (matchLeafSequence(item, queue, Directive::OMPD_taskloop_simd))
|
|
newOp = genCompositeTaskloopSimd(converter, symTable, stmtCtx, semaCtx,
|
|
eval, loc, queue, item);
|
|
else
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
static void genOMPDispatch(lower::AbstractConverter &converter,
|
|
lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval, mlir::Location loc,
|
|
const ConstructQueue &queue,
|
|
ConstructQueue::const_iterator item) {
|
|
assert(item != queue.end());
|
|
|
|
lower::StatementContext stmtCtx;
|
|
mlir::Operation *newOp = nullptr;
|
|
|
|
// Generate cleanup code for the stmtCtx after newOp
|
|
auto finalizeStmtCtx = [&]() {
|
|
if (newOp) {
|
|
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
|
|
fir::FirOpBuilder::InsertionGuard guard(builder);
|
|
builder.setInsertionPointAfter(newOp);
|
|
stmtCtx.finalizeAndPop();
|
|
}
|
|
};
|
|
|
|
bool loopLeaf = llvm::omp::getDirectiveAssociation(item->id) ==
|
|
llvm::omp::Association::Loop;
|
|
if (loopLeaf) {
|
|
symTable.pushScope();
|
|
if (genOMPCompositeDispatch(converter, symTable, stmtCtx, semaCtx, eval,
|
|
loc, queue, item, newOp)) {
|
|
symTable.popScope();
|
|
finalizeStmtCtx();
|
|
return;
|
|
}
|
|
}
|
|
|
|
switch (llvm::omp::Directive dir = item->id) {
|
|
case llvm::omp::Directive::OMPD_barrier:
|
|
newOp = genBarrierOp(converter, symTable, semaCtx, eval, loc, queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_distribute:
|
|
newOp = genStandaloneDistribute(converter, symTable, stmtCtx, semaCtx, eval,
|
|
loc, queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_do:
|
|
newOp = genStandaloneDo(converter, symTable, stmtCtx, semaCtx, eval, loc,
|
|
queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_loop:
|
|
newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_masked:
|
|
newOp = genMaskedOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
|
|
item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_master:
|
|
newOp = genMasterOp(converter, symTable, semaCtx, eval, loc, queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_ordered:
|
|
// Block-associated "ordered" construct.
|
|
newOp = genOrderedRegionOp(converter, symTable, semaCtx, eval, loc, queue,
|
|
item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_parallel:
|
|
newOp = genStandaloneParallel(converter, symTable, stmtCtx, semaCtx, eval,
|
|
loc, queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_scan:
|
|
newOp = genScanOp(converter, symTable, semaCtx, loc, queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_section:
|
|
llvm_unreachable("genOMPDispatch: OMPD_section");
|
|
// Lowered in the enclosing genSectionsOp.
|
|
break;
|
|
case llvm::omp::Directive::OMPD_sections:
|
|
// Called directly from genOMP([...], OpenMPSectionsConstruct) because it
|
|
// has a different prototype.
|
|
// This code path is still taken when iterating through the construct queue
|
|
// in genBodyOfOp
|
|
break;
|
|
case llvm::omp::Directive::OMPD_simd:
|
|
newOp =
|
|
genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_scope:
|
|
newOp = genScopeOp(converter, symTable, semaCtx, eval, loc, queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_single:
|
|
newOp = genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_target:
|
|
newOp = genTargetOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
|
|
item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_target_data:
|
|
newOp = genTargetDataOp(converter, symTable, stmtCtx, semaCtx, eval, loc,
|
|
queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_target_enter_data:
|
|
newOp = genTargetEnterExitUpdateDataOp<mlir::omp::TargetEnterDataOp>(
|
|
converter, symTable, stmtCtx, semaCtx, loc, queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_target_exit_data:
|
|
newOp = genTargetEnterExitUpdateDataOp<mlir::omp::TargetExitDataOp>(
|
|
converter, symTable, stmtCtx, semaCtx, loc, queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_target_update:
|
|
newOp = genTargetEnterExitUpdateDataOp<mlir::omp::TargetUpdateOp>(
|
|
converter, symTable, stmtCtx, semaCtx, loc, queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_task:
|
|
newOp = genTaskOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
|
|
item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_taskgroup:
|
|
newOp =
|
|
genTaskgroupOp(converter, symTable, semaCtx, eval, loc, queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_taskloop:
|
|
newOp = genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc,
|
|
queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_taskwait:
|
|
newOp = genTaskwaitOp(converter, symTable, semaCtx, eval, loc, queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_taskyield:
|
|
newOp =
|
|
genTaskyieldOp(converter, symTable, semaCtx, eval, loc, queue, item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_teams:
|
|
newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
|
|
item);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_tile:
|
|
case llvm::omp::Directive::OMPD_unroll:
|
|
TODO(loc, "Unhandled loop directive (" +
|
|
llvm::omp::getOpenMPDirectiveName(dir) + ")");
|
|
// case llvm::omp::Directive::OMPD_workdistribute:
|
|
case llvm::omp::Directive::OMPD_workshare:
|
|
newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc,
|
|
queue, item);
|
|
break;
|
|
default:
|
|
// Combined and composite constructs should have been split into a sequence
|
|
// of leaf constructs when building the construct queue.
|
|
assert(!llvm::omp::isLeafConstruct(dir) &&
|
|
"Unexpected compound construct.");
|
|
break;
|
|
}
|
|
|
|
finalizeStmtCtx();
|
|
if (loopLeaf)
|
|
symTable.popScope();
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// OpenMPDeclarativeConstruct visitors
|
|
//===----------------------------------------------------------------------===//
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPUtilityConstruct &);
|
|
|
|
static void
|
|
genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
const parser::OpenMPDeclarativeAllocate &declarativeAllocate) {
|
|
TODO(converter.getCurrentLocation(), "OpenMPDeclarativeAllocate");
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPDeclarativeAssumes &assumesConstruct) {
|
|
TODO(converter.getCurrentLocation(), "OpenMP ASSUMES declaration");
|
|
}
|
|
|
|
static void
|
|
genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
const parser::OmpDeclareVariantDirective &declareVariantDirective) {
|
|
TODO(converter.getCurrentLocation(), "OmpDeclareVariantDirective");
|
|
}
|
|
|
|
static void genOMP(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
const parser::OpenMPDeclareReductionConstruct &declareReductionConstruct) {
|
|
TODO(converter.getCurrentLocation(), "OpenMPDeclareReductionConstruct");
|
|
}
|
|
|
|
static void
|
|
genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
const parser::OpenMPDeclareSimdConstruct &declareSimdConstruct) {
|
|
TODO(converter.getCurrentLocation(), "OpenMPDeclareSimdConstruct");
|
|
}
|
|
|
|
static void
|
|
genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
const parser::OpenMPDeclareMapperConstruct &declareMapperConstruct) {
|
|
mlir::Location loc = converter.genLocation(declareMapperConstruct.source);
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
lower::StatementContext stmtCtx;
|
|
const auto &spec =
|
|
std::get<parser::OmpMapperSpecifier>(declareMapperConstruct.t);
|
|
const auto &mapperName{std::get<std::optional<parser::Name>>(spec.t)};
|
|
const auto &varType{std::get<parser::TypeSpec>(spec.t)};
|
|
const auto &varName{std::get<parser::Name>(spec.t)};
|
|
assert(varType.declTypeSpec->category() ==
|
|
semantics::DeclTypeSpec::Category::TypeDerived &&
|
|
"Expected derived type");
|
|
|
|
std::string mapperNameStr;
|
|
if (mapperName.has_value()) {
|
|
mapperNameStr = mapperName->ToString();
|
|
mapperNameStr =
|
|
converter.mangleName(mapperNameStr, mapperName->symbol->owner());
|
|
} else {
|
|
mapperNameStr =
|
|
varType.declTypeSpec->derivedTypeSpec().name().ToString() + ".default";
|
|
mapperNameStr = converter.mangleName(
|
|
mapperNameStr, *varType.declTypeSpec->derivedTypeSpec().GetScope());
|
|
}
|
|
|
|
// Save current insertion point before moving to the module scope to create
|
|
// the DeclareMapperOp
|
|
mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
|
|
|
|
firOpBuilder.setInsertionPointToStart(converter.getModuleOp().getBody());
|
|
auto mlirType = converter.genType(varType.declTypeSpec->derivedTypeSpec());
|
|
auto declMapperOp = firOpBuilder.create<mlir::omp::DeclareMapperOp>(
|
|
loc, mapperNameStr, mlirType);
|
|
auto ®ion = declMapperOp.getRegion();
|
|
firOpBuilder.createBlock(®ion);
|
|
auto varVal = region.addArgument(firOpBuilder.getRefType(mlirType), loc);
|
|
converter.bindSymbol(*varName.symbol, varVal);
|
|
|
|
// Populate the declareMapper region with the map information.
|
|
mlir::omp::DeclareMapperInfoOperands clauseOps;
|
|
const auto *clauseList{
|
|
parser::Unwrap<parser::OmpClauseList>(declareMapperConstruct.t)};
|
|
List<Clause> clauses = makeClauses(*clauseList, semaCtx);
|
|
ClauseProcessor cp(converter, semaCtx, clauses);
|
|
cp.processMap(loc, stmtCtx, clauseOps);
|
|
firOpBuilder.create<mlir::omp::DeclareMapperInfoOp>(loc, clauseOps.mapVars);
|
|
}
|
|
|
|
static void
|
|
genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) {
|
|
mlir::omp::DeclareTargetOperands clauseOps;
|
|
llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
|
|
mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
|
|
getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct,
|
|
clauseOps, symbolAndClause);
|
|
|
|
for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
|
|
mlir::Operation *op = mod.lookupSymbol(
|
|
converter.mangleName(std::get<const semantics::Symbol &>(symClause)));
|
|
|
|
// Some symbols are deferred until later in the module, these are handled
|
|
// upon finalization of the module for OpenMP inside of Bridge, so we simply
|
|
// skip for now.
|
|
if (!op)
|
|
continue;
|
|
|
|
markDeclareTarget(
|
|
op, converter,
|
|
std::get<mlir::omp::DeclareTargetCaptureClause>(symClause),
|
|
clauseOps.deviceType);
|
|
}
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPRequiresConstruct &requiresConstruct) {
|
|
// Requires directives are gathered and processed in semantics and
|
|
// then combined in the lowering bridge before triggering codegen
|
|
// just once. Hence, there is no need to lower each individual
|
|
// occurrence here.
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPThreadprivate &threadprivate) {
|
|
// The directive is lowered when instantiating the variable to
|
|
// support the case of threadprivate variable declared in module.
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OmpMetadirectiveDirective &meta) {
|
|
TODO(converter.getCurrentLocation(), "METADIRECTIVE");
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPDeclarativeConstruct &ompDeclConstruct) {
|
|
Fortran::common::visit(
|
|
[&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); },
|
|
ompDeclConstruct.u);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// OpenMPStandaloneConstruct visitors
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPSimpleStandaloneConstruct &construct) {
|
|
const auto &directive = std::get<parser::OmpDirectiveName>(construct.v.t);
|
|
List<Clause> clauses = makeClauses(construct.v.Clauses(), semaCtx);
|
|
mlir::Location currentLocation = converter.genLocation(directive.source);
|
|
|
|
ConstructQueue queue{
|
|
buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
|
|
eval, directive.source, directive.v, clauses)};
|
|
if (directive.v == llvm::omp::Directive::OMPD_ordered) {
|
|
// Standalone "ordered" directive.
|
|
genOrderedOp(converter, symTable, semaCtx, eval, currentLocation, queue,
|
|
queue.begin());
|
|
} else {
|
|
// Dispatch handles the "block-associated" variant of "ordered".
|
|
genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
|
|
queue.begin());
|
|
}
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPFlushConstruct &construct) {
|
|
const auto &argumentList = construct.v.Arguments();
|
|
const auto &clauseList = construct.v.Clauses();
|
|
ObjectList objects = makeObjects(argumentList, semaCtx);
|
|
List<Clause> clauses =
|
|
makeList(clauseList.v, [&](auto &&s) { return makeClause(s, semaCtx); });
|
|
mlir::Location currentLocation = converter.genLocation(construct.source);
|
|
|
|
ConstructQueue queue{buildConstructQueue(
|
|
converter.getFirOpBuilder().getModule(), semaCtx, eval, construct.source,
|
|
llvm::omp::Directive::OMPD_flush, clauses)};
|
|
genFlushOp(converter, symTable, semaCtx, eval, currentLocation, objects,
|
|
queue, queue.begin());
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPCancelConstruct &cancelConstruct) {
|
|
List<Clause> clauses = makeList(cancelConstruct.v.Clauses().v, [&](auto &&s) {
|
|
return makeClause(s, semaCtx);
|
|
});
|
|
mlir::Location loc = converter.genLocation(cancelConstruct.source);
|
|
|
|
ConstructQueue queue{buildConstructQueue(
|
|
converter.getFirOpBuilder().getModule(), semaCtx, eval,
|
|
cancelConstruct.source, llvm::omp::Directive::OMPD_cancel, clauses)};
|
|
genCancelOp(converter, semaCtx, eval, loc, queue, queue.begin());
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPCancellationPointConstruct
|
|
&cancellationPointConstruct) {
|
|
List<Clause> clauses =
|
|
makeList(cancellationPointConstruct.v.Clauses().v,
|
|
[&](auto &&s) { return makeClause(s, semaCtx); });
|
|
mlir::Location loc = converter.genLocation(cancellationPointConstruct.source);
|
|
|
|
ConstructQueue queue{
|
|
buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
|
|
eval, cancellationPointConstruct.source,
|
|
llvm::omp::Directive::OMPD_cancel, clauses)};
|
|
genCancellationPointOp(converter, semaCtx, eval, loc, queue, queue.begin());
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPDepobjConstruct &construct) {
|
|
// These values will be ignored until the construct itself is implemented,
|
|
// but run them anyway for the sake of testing (via a Todo test).
|
|
ObjectList objects = makeObjects(construct.v.Arguments(), semaCtx);
|
|
assert(objects.size() == 1);
|
|
List<Clause> clauses = makeClauses(construct.v.Clauses(), semaCtx);
|
|
assert(clauses.size() == 1);
|
|
(void)objects;
|
|
(void)clauses;
|
|
|
|
TODO(converter.getCurrentLocation(), "OpenMPDepobjConstruct");
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPInteropConstruct &interopConstruct) {
|
|
TODO(converter.getCurrentLocation(), "OpenMPInteropConstruct");
|
|
}
|
|
|
|
static void
|
|
genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
const parser::OpenMPStandaloneConstruct &standaloneConstruct) {
|
|
Fortran::common::visit(
|
|
[&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); },
|
|
standaloneConstruct.u);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// OpenMPConstruct visitors
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPAllocatorsConstruct &allocsConstruct) {
|
|
TODO(converter.getCurrentLocation(), "OpenMPAllocatorsConstruct");
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPAtomicConstruct &atomicConstruct) {
|
|
Fortran::common::visit(
|
|
common::visitors{
|
|
[&](const parser::OmpAtomicRead &atomicRead) {
|
|
mlir::Location loc = converter.genLocation(atomicRead.source);
|
|
genAtomicRead(converter, atomicRead, loc);
|
|
},
|
|
[&](const parser::OmpAtomicWrite &atomicWrite) {
|
|
mlir::Location loc = converter.genLocation(atomicWrite.source);
|
|
genAtomicWrite(converter, atomicWrite, loc);
|
|
},
|
|
[&](const parser::OmpAtomic &atomicConstruct) {
|
|
mlir::Location loc = converter.genLocation(atomicConstruct.source);
|
|
genOmpAtomic(converter, atomicConstruct, loc);
|
|
},
|
|
[&](const parser::OmpAtomicUpdate &atomicUpdate) {
|
|
mlir::Location loc = converter.genLocation(atomicUpdate.source);
|
|
genAtomicUpdate(converter, atomicUpdate, loc);
|
|
},
|
|
[&](const parser::OmpAtomicCapture &atomicCapture) {
|
|
mlir::Location loc = converter.genLocation(atomicCapture.source);
|
|
genAtomicCapture(converter, atomicCapture, loc);
|
|
},
|
|
[&](const parser::OmpAtomicCompare &atomicCompare) {
|
|
mlir::Location loc = converter.genLocation(atomicCompare.source);
|
|
TODO(loc, "OpenMP atomic compare");
|
|
},
|
|
},
|
|
atomicConstruct.u);
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPBlockConstruct &blockConstruct) {
|
|
const auto &beginBlockDirective =
|
|
std::get<parser::OmpBeginBlockDirective>(blockConstruct.t);
|
|
const auto &endBlockDirective =
|
|
std::get<parser::OmpEndBlockDirective>(blockConstruct.t);
|
|
mlir::Location currentLocation =
|
|
converter.genLocation(beginBlockDirective.source);
|
|
const auto origDirective =
|
|
std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v;
|
|
List<Clause> clauses = makeClauses(
|
|
std::get<parser::OmpClauseList>(beginBlockDirective.t), semaCtx);
|
|
clauses.append(makeClauses(
|
|
std::get<parser::OmpClauseList>(endBlockDirective.t), semaCtx));
|
|
|
|
assert(llvm::omp::blockConstructSet.test(origDirective) &&
|
|
"Expected block construct");
|
|
(void)origDirective;
|
|
|
|
for (const Clause &clause : clauses) {
|
|
mlir::Location clauseLocation = converter.genLocation(clause.source);
|
|
if (!std::holds_alternative<clause::Affinity>(clause.u) &&
|
|
!std::holds_alternative<clause::Allocate>(clause.u) &&
|
|
!std::holds_alternative<clause::Copyin>(clause.u) &&
|
|
!std::holds_alternative<clause::Copyprivate>(clause.u) &&
|
|
!std::holds_alternative<clause::Default>(clause.u) &&
|
|
!std::holds_alternative<clause::Depend>(clause.u) &&
|
|
!std::holds_alternative<clause::Filter>(clause.u) &&
|
|
!std::holds_alternative<clause::Final>(clause.u) &&
|
|
!std::holds_alternative<clause::Firstprivate>(clause.u) &&
|
|
!std::holds_alternative<clause::HasDeviceAddr>(clause.u) &&
|
|
!std::holds_alternative<clause::If>(clause.u) &&
|
|
!std::holds_alternative<clause::IsDevicePtr>(clause.u) &&
|
|
!std::holds_alternative<clause::Map>(clause.u) &&
|
|
!std::holds_alternative<clause::Nowait>(clause.u) &&
|
|
!std::holds_alternative<clause::NumTeams>(clause.u) &&
|
|
!std::holds_alternative<clause::NumThreads>(clause.u) &&
|
|
!std::holds_alternative<clause::OmpxBare>(clause.u) &&
|
|
!std::holds_alternative<clause::Priority>(clause.u) &&
|
|
!std::holds_alternative<clause::Private>(clause.u) &&
|
|
!std::holds_alternative<clause::ProcBind>(clause.u) &&
|
|
!std::holds_alternative<clause::Reduction>(clause.u) &&
|
|
!std::holds_alternative<clause::Shared>(clause.u) &&
|
|
!std::holds_alternative<clause::Simd>(clause.u) &&
|
|
!std::holds_alternative<clause::ThreadLimit>(clause.u) &&
|
|
!std::holds_alternative<clause::Threads>(clause.u) &&
|
|
!std::holds_alternative<clause::UseDeviceAddr>(clause.u) &&
|
|
!std::holds_alternative<clause::UseDevicePtr>(clause.u) &&
|
|
!std::holds_alternative<clause::InReduction>(clause.u) &&
|
|
!std::holds_alternative<clause::Mergeable>(clause.u) &&
|
|
!std::holds_alternative<clause::Untied>(clause.u) &&
|
|
!std::holds_alternative<clause::TaskReduction>(clause.u) &&
|
|
!std::holds_alternative<clause::Detach>(clause.u)) {
|
|
std::string name =
|
|
parser::ToUpperCaseLetters(llvm::omp::getOpenMPClauseName(clause.id));
|
|
TODO(clauseLocation, name + " clause is not implemented yet");
|
|
}
|
|
}
|
|
|
|
llvm::omp::Directive directive =
|
|
std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v;
|
|
const parser::CharBlock &source =
|
|
std::get<parser::OmpBlockDirective>(beginBlockDirective.t).source;
|
|
ConstructQueue queue{
|
|
buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
|
|
eval, source, directive, clauses)};
|
|
genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
|
|
queue.begin());
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPAssumeConstruct &assumeConstruct) {
|
|
mlir::Location clauseLocation = converter.genLocation(assumeConstruct.source);
|
|
TODO(clauseLocation, "OpenMP ASSUME construct");
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPCriticalConstruct &criticalConstruct) {
|
|
const auto &cd = std::get<parser::OmpCriticalDirective>(criticalConstruct.t);
|
|
List<Clause> clauses =
|
|
makeClauses(std::get<parser::OmpClauseList>(cd.t), semaCtx);
|
|
|
|
ConstructQueue queue{buildConstructQueue(
|
|
converter.getFirOpBuilder().getModule(), semaCtx, eval, cd.source,
|
|
llvm::omp::Directive::OMPD_critical, clauses)};
|
|
|
|
const auto &name = std::get<std::optional<parser::Name>>(cd.t);
|
|
mlir::Location currentLocation = converter.getCurrentLocation();
|
|
genCriticalOp(converter, symTable, semaCtx, eval, currentLocation, queue,
|
|
queue.begin(), name);
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPUtilityConstruct &) {
|
|
TODO(converter.getCurrentLocation(), "OpenMPUtilityConstruct");
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPDispatchConstruct &) {
|
|
TODO(converter.getCurrentLocation(), "OpenMPDispatchConstruct");
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPExecutableAllocate &execAllocConstruct) {
|
|
TODO(converter.getCurrentLocation(), "OpenMPExecutableAllocate");
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPLoopConstruct &loopConstruct) {
|
|
const auto &beginLoopDirective =
|
|
std::get<parser::OmpBeginLoopDirective>(loopConstruct.t);
|
|
List<Clause> clauses = makeClauses(
|
|
std::get<parser::OmpClauseList>(beginLoopDirective.t), semaCtx);
|
|
if (auto &endLoopDirective =
|
|
std::get<std::optional<parser::OmpEndLoopDirective>>(
|
|
loopConstruct.t)) {
|
|
clauses.append(makeClauses(
|
|
std::get<parser::OmpClauseList>(endLoopDirective->t), semaCtx));
|
|
}
|
|
|
|
mlir::Location currentLocation =
|
|
converter.genLocation(beginLoopDirective.source);
|
|
|
|
llvm::omp::Directive directive =
|
|
std::get<parser::OmpLoopDirective>(beginLoopDirective.t).v;
|
|
const parser::CharBlock &source =
|
|
std::get<parser::OmpLoopDirective>(beginLoopDirective.t).source;
|
|
ConstructQueue queue{
|
|
buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
|
|
eval, source, directive, clauses)};
|
|
genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
|
|
queue.begin());
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPSectionConstruct §ionConstruct) {
|
|
// Do nothing here. SECTION is lowered inside of the lowering for Sections
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPSectionsConstruct §ionsConstruct) {
|
|
const auto &beginSectionsDirective =
|
|
std::get<parser::OmpBeginSectionsDirective>(sectionsConstruct.t);
|
|
List<Clause> clauses = makeClauses(
|
|
std::get<parser::OmpClauseList>(beginSectionsDirective.t), semaCtx);
|
|
const auto &endSectionsDirective =
|
|
std::get<parser::OmpEndSectionsDirective>(sectionsConstruct.t);
|
|
const auto §ionBlocks =
|
|
std::get<parser::OmpSectionBlocks>(sectionsConstruct.t);
|
|
clauses.append(makeClauses(
|
|
std::get<parser::OmpClauseList>(endSectionsDirective.t), semaCtx));
|
|
mlir::Location currentLocation = converter.getCurrentLocation();
|
|
|
|
llvm::omp::Directive directive =
|
|
std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).v;
|
|
const parser::CharBlock &source =
|
|
std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).source;
|
|
ConstructQueue queue{
|
|
buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
|
|
eval, source, directive, clauses)};
|
|
ConstructQueue::iterator next = queue.begin();
|
|
// Generate constructs that come first e.g. Parallel
|
|
while (next != queue.end() &&
|
|
next->id != llvm::omp::Directive::OMPD_sections) {
|
|
genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
|
|
next);
|
|
next = std::next(next);
|
|
}
|
|
|
|
// call genSectionsOp directly (not via genOMPDispatch) so that we can add the
|
|
// sectionBlocks argument
|
|
assert(next != queue.end());
|
|
assert(next->id == llvm::omp::Directive::OMPD_sections);
|
|
genSectionsOp(converter, symTable, semaCtx, eval, currentLocation, queue,
|
|
next, sectionBlocks);
|
|
assert(std::next(next) == queue.end());
|
|
}
|
|
|
|
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPConstruct &ompConstruct) {
|
|
Fortran::common::visit(
|
|
[&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); },
|
|
ompConstruct.u);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Public functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder,
|
|
mlir::Operation *op,
|
|
mlir::Location loc) {
|
|
if (mlir::isa<mlir::omp::AtomicUpdateOp, mlir::omp::DeclareReductionOp,
|
|
mlir::omp::LoopNestOp>(op))
|
|
return builder.create<mlir::omp::YieldOp>(loc);
|
|
return builder.create<mlir::omp::TerminatorOp>(loc);
|
|
}
|
|
|
|
void Fortran::lower::genOpenMPConstruct(lower::AbstractConverter &converter,
|
|
lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPConstruct &omp) {
|
|
lower::SymMapScope scope(symTable);
|
|
genOMP(converter, symTable, semaCtx, eval, omp);
|
|
}
|
|
|
|
void Fortran::lower::genOpenMPDeclarativeConstruct(
|
|
lower::AbstractConverter &converter, lower::SymMap &symTable,
|
|
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
|
|
const parser::OpenMPDeclarativeConstruct &omp) {
|
|
genOMP(converter, symTable, semaCtx, eval, omp);
|
|
genNestedEvaluations(converter, eval);
|
|
}
|
|
|
|
void Fortran::lower::genOpenMPSymbolProperties(
|
|
lower::AbstractConverter &converter, const lower::pft::Variable &var) {
|
|
assert(var.hasSymbol() && "Expecting Symbol");
|
|
const semantics::Symbol &sym = var.getSymbol();
|
|
|
|
if (sym.test(semantics::Symbol::Flag::OmpThreadprivate))
|
|
lower::genThreadprivateOp(converter, var);
|
|
|
|
if (sym.test(semantics::Symbol::Flag::OmpDeclareTarget))
|
|
lower::genDeclareTargetIntGlobal(converter, var);
|
|
}
|
|
|
|
int64_t
|
|
Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) {
|
|
for (const parser::OmpClause &clause : clauseList.v) {
|
|
if (const auto &collapseClause =
|
|
std::get_if<parser::OmpClause::Collapse>(&clause.u)) {
|
|
const auto *expr = semantics::GetExpr(collapseClause->v);
|
|
return evaluate::ToInt64(*expr).value();
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter,
|
|
const lower::pft::Variable &var) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
mlir::Location currentLocation = converter.getCurrentLocation();
|
|
|
|
const semantics::Symbol &sym = var.getSymbol();
|
|
mlir::Value symThreadprivateValue;
|
|
if (const semantics::Symbol *common =
|
|
semantics::FindCommonBlockContaining(sym.GetUltimate())) {
|
|
mlir::Value commonValue = converter.getSymbolAddress(*common);
|
|
if (mlir::isa<mlir::omp::ThreadprivateOp>(commonValue.getDefiningOp())) {
|
|
// Generate ThreadprivateOp for a common block instead of its members and
|
|
// only do it once for a common block.
|
|
return;
|
|
}
|
|
// Generate ThreadprivateOp and rebind the common block.
|
|
mlir::Value commonThreadprivateValue =
|
|
firOpBuilder.create<mlir::omp::ThreadprivateOp>(
|
|
currentLocation, commonValue.getType(), commonValue);
|
|
converter.bindSymbol(*common, commonThreadprivateValue);
|
|
// Generate the threadprivate value for the common block member.
|
|
symThreadprivateValue = genCommonBlockMember(converter, currentLocation,
|
|
sym, commonThreadprivateValue);
|
|
} else if (!var.isGlobal()) {
|
|
// Non-global variable which can be in threadprivate directive must be one
|
|
// variable in main program, and it has implicit SAVE attribute. Take it as
|
|
// with SAVE attribute, so to create GlobalOp for it to simplify the
|
|
// translation to LLVM IR.
|
|
// Avoids performing multiple globalInitializations.
|
|
fir::GlobalOp global;
|
|
auto module = converter.getModuleOp();
|
|
std::string globalName = converter.mangleName(sym);
|
|
if (module.lookupSymbol<fir::GlobalOp>(globalName))
|
|
global = module.lookupSymbol<fir::GlobalOp>(globalName);
|
|
else
|
|
global = globalInitialization(converter, firOpBuilder, sym, var,
|
|
currentLocation);
|
|
|
|
mlir::Value symValue = firOpBuilder.create<fir::AddrOfOp>(
|
|
currentLocation, global.resultType(), global.getSymbol());
|
|
symThreadprivateValue = firOpBuilder.create<mlir::omp::ThreadprivateOp>(
|
|
currentLocation, symValue.getType(), symValue);
|
|
} else {
|
|
mlir::Value symValue = converter.getSymbolAddress(sym);
|
|
|
|
// The symbol may be use-associated multiple times, and nothing needs to be
|
|
// done after the original symbol is mapped to the threadprivatized value
|
|
// for the first time. Use the threadprivatized value directly.
|
|
mlir::Operation *op;
|
|
if (auto declOp = symValue.getDefiningOp<hlfir::DeclareOp>())
|
|
op = declOp.getMemref().getDefiningOp();
|
|
else
|
|
op = symValue.getDefiningOp();
|
|
if (mlir::isa<mlir::omp::ThreadprivateOp>(op))
|
|
return;
|
|
|
|
symThreadprivateValue = firOpBuilder.create<mlir::omp::ThreadprivateOp>(
|
|
currentLocation, symValue.getType(), symValue);
|
|
}
|
|
|
|
fir::ExtendedValue sexv = converter.getSymbolExtendedValue(sym);
|
|
fir::ExtendedValue symThreadprivateExv =
|
|
getExtendedValue(sexv, symThreadprivateValue);
|
|
converter.bindSymbol(sym, symThreadprivateExv);
|
|
}
|
|
|
|
// This function replicates threadprivate's behaviour of generating
|
|
// an internal fir.GlobalOp for non-global variables in the main program
|
|
// that have the implicit SAVE attribute, to simplifiy LLVM-IR and MLIR
|
|
// generation.
|
|
void Fortran::lower::genDeclareTargetIntGlobal(
|
|
lower::AbstractConverter &converter, const lower::pft::Variable &var) {
|
|
if (!var.isGlobal()) {
|
|
// A non-global variable which can be in a declare target directive must
|
|
// be a variable in the main program, and it has the implicit SAVE
|
|
// attribute. We create a GlobalOp for it to simplify the translation to
|
|
// LLVM IR.
|
|
globalInitialization(converter, converter.getFirOpBuilder(),
|
|
var.getSymbol(), var, converter.getCurrentLocation());
|
|
}
|
|
}
|
|
|
|
bool Fortran::lower::isOpenMPTargetConstruct(
|
|
const parser::OpenMPConstruct &omp) {
|
|
llvm::omp::Directive dir = llvm::omp::Directive::OMPD_unknown;
|
|
if (const auto *block = std::get_if<parser::OpenMPBlockConstruct>(&omp.u)) {
|
|
const auto &begin = std::get<parser::OmpBeginBlockDirective>(block->t);
|
|
dir = std::get<parser::OmpBlockDirective>(begin.t).v;
|
|
} else if (const auto *loop =
|
|
std::get_if<parser::OpenMPLoopConstruct>(&omp.u)) {
|
|
const auto &begin = std::get<parser::OmpBeginLoopDirective>(loop->t);
|
|
dir = std::get<parser::OmpLoopDirective>(begin.t).v;
|
|
}
|
|
return llvm::omp::allTargetSet.test(dir);
|
|
}
|
|
|
|
void Fortran::lower::gatherOpenMPDeferredDeclareTargets(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPDeclarativeConstruct &ompDecl,
|
|
llvm::SmallVectorImpl<OMPDeferredDeclareTargetInfo>
|
|
&deferredDeclareTarget) {
|
|
Fortran::common::visit(
|
|
common::visitors{
|
|
[&](const parser::OpenMPDeclareTargetConstruct &ompReq) {
|
|
collectDeferredDeclareTargets(converter, semaCtx, eval, ompReq,
|
|
deferredDeclareTarget);
|
|
},
|
|
[&](const auto &) {},
|
|
},
|
|
ompDecl.u);
|
|
}
|
|
|
|
bool Fortran::lower::isOpenMPDeviceDeclareTarget(
|
|
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
|
|
lower::pft::Evaluation &eval,
|
|
const parser::OpenMPDeclarativeConstruct &ompDecl) {
|
|
return Fortran::common::visit(
|
|
common::visitors{
|
|
[&](const parser::OpenMPDeclareTargetConstruct &ompReq) {
|
|
mlir::omp::DeclareTargetDeviceType targetType =
|
|
getDeclareTargetFunctionDevice(converter, semaCtx, eval, ompReq)
|
|
.value_or(mlir::omp::DeclareTargetDeviceType::host);
|
|
return targetType != mlir::omp::DeclareTargetDeviceType::host;
|
|
},
|
|
[&](const auto &) { return false; },
|
|
},
|
|
ompDecl.u);
|
|
}
|
|
|
|
// In certain cases such as subroutine or function interfaces which declare
|
|
// but do not define or directly call the subroutine or function in the same
|
|
// module, their lowering is delayed until after the declare target construct
|
|
// itself is processed, so there symbol is not within the table.
|
|
//
|
|
// This function will also return true if we encounter any device declare
|
|
// target cases, to satisfy checking if we require the requires attributes
|
|
// on the module.
|
|
bool Fortran::lower::markOpenMPDeferredDeclareTargetFunctions(
|
|
mlir::Operation *mod,
|
|
llvm::SmallVectorImpl<OMPDeferredDeclareTargetInfo> &deferredDeclareTargets,
|
|
AbstractConverter &converter) {
|
|
bool deviceCodeFound = false;
|
|
auto modOp = llvm::cast<mlir::ModuleOp>(mod);
|
|
for (auto declTar : deferredDeclareTargets) {
|
|
mlir::Operation *op = modOp.lookupSymbol(converter.mangleName(declTar.sym));
|
|
|
|
// Due to interfaces being optionally emitted on usage in a module,
|
|
// not finding an operation at this point cannot be a hard error, we
|
|
// simply ignore it for now.
|
|
// TODO: Add semantic checks for detecting cases where an erronous
|
|
// (undefined) symbol has been supplied to a declare target clause
|
|
if (!op)
|
|
continue;
|
|
|
|
auto devType = declTar.declareTargetDeviceType;
|
|
if (!deviceCodeFound && devType != mlir::omp::DeclareTargetDeviceType::host)
|
|
deviceCodeFound = true;
|
|
|
|
markDeclareTarget(op, converter, declTar.declareTargetCaptureClause,
|
|
devType);
|
|
}
|
|
|
|
return deviceCodeFound;
|
|
}
|
|
|
|
void Fortran::lower::genOpenMPRequires(mlir::Operation *mod,
|
|
const semantics::Symbol *symbol) {
|
|
using MlirRequires = mlir::omp::ClauseRequires;
|
|
using SemaRequires = semantics::WithOmpDeclarative::RequiresFlag;
|
|
|
|
if (auto offloadMod =
|
|
llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(mod)) {
|
|
semantics::WithOmpDeclarative::RequiresFlags semaFlags;
|
|
if (symbol) {
|
|
common::visit(
|
|
[&](const auto &details) {
|
|
if constexpr (std::is_base_of_v<semantics::WithOmpDeclarative,
|
|
std::decay_t<decltype(details)>>) {
|
|
if (details.has_ompRequires())
|
|
semaFlags = *details.ompRequires();
|
|
}
|
|
},
|
|
symbol->details());
|
|
}
|
|
|
|
// Use pre-populated omp.requires module attribute if it was set, so that
|
|
// the "-fopenmp-force-usm" compiler option is honored.
|
|
MlirRequires mlirFlags = offloadMod.getRequires();
|
|
if (semaFlags.test(SemaRequires::ReverseOffload))
|
|
mlirFlags = mlirFlags | MlirRequires::reverse_offload;
|
|
if (semaFlags.test(SemaRequires::UnifiedAddress))
|
|
mlirFlags = mlirFlags | MlirRequires::unified_address;
|
|
if (semaFlags.test(SemaRequires::UnifiedSharedMemory))
|
|
mlirFlags = mlirFlags | MlirRequires::unified_shared_memory;
|
|
if (semaFlags.test(SemaRequires::DynamicAllocators))
|
|
mlirFlags = mlirFlags | MlirRequires::dynamic_allocators;
|
|
|
|
offloadMod.setRequires(mlirFlags);
|
|
}
|
|
}
|