This effectively implements some now deprecated OpenMP functionality that some applications (most notably at the moment GenASiS) unfortunately depend on (deprecated in specification version 5.2): "If a list item in a use_device_ptr clause is not of type C_PTR, the behavior is as if the list item appeared in a use_device_addr clause. Support for such list items in a use_device_ptr clause is deprecated." This PR downgrades the hard-error to a deprecated warning and "promotes" the above cases by simply moving the offending operands from the use_device_ptr value list to the back of the use_device_addr list (and moves the related symbols, locs and types that form the BlockArgs correspondingly) and then the generation of the target data construct proceeds as normal.
2745 lines
119 KiB
C++
2745 lines
119 KiB
C++
//===-- OpenMP.cpp -- Open MP directive lowering --------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "flang/Lower/OpenMP.h"
|
|
|
|
#include "ClauseProcessor.h"
|
|
#include "DataSharingProcessor.h"
|
|
#include "DirectivesCommon.h"
|
|
#include "ReductionProcessor.h"
|
|
#include "flang/Common/idioms.h"
|
|
#include "flang/Lower/Bridge.h"
|
|
#include "flang/Lower/ConvertExpr.h"
|
|
#include "flang/Lower/ConvertVariable.h"
|
|
#include "flang/Lower/StatementContext.h"
|
|
#include "flang/Lower/SymbolMap.h"
|
|
#include "flang/Optimizer/Builder/BoxValue.h"
|
|
#include "flang/Optimizer/Builder/FIRBuilder.h"
|
|
#include "flang/Optimizer/Builder/Todo.h"
|
|
#include "flang/Optimizer/Dialect/FIRType.h"
|
|
#include "flang/Optimizer/HLFIR/HLFIROps.h"
|
|
#include "flang/Parser/parse-tree.h"
|
|
#include "flang/Semantics/openmp-directive-sets.h"
|
|
#include "flang/Semantics/tools.h"
|
|
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
|
|
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
|
|
#include "mlir/Transforms/RegionUtils.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/Frontend/OpenMP/OMPConstants.h"
|
|
|
|
using namespace Fortran::lower::omp;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Code generation helper functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static Fortran::lower::pft::Evaluation *
|
|
getCollapsedLoopEval(Fortran::lower::pft::Evaluation &eval, int collapseValue) {
|
|
// Return the Evaluation of the innermost collapsed loop, or the current one
|
|
// if there was no COLLAPSE.
|
|
if (collapseValue == 0)
|
|
return &eval;
|
|
|
|
Fortran::lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation();
|
|
for (int i = 1; i < collapseValue; i++) {
|
|
// The nested evaluations should be DoConstructs (i.e. they should form
|
|
// a loop nest). Each DoConstruct is a tuple <NonLabelDoStmt, Block,
|
|
// EndDoStmt>.
|
|
assert(curEval->isA<Fortran::parser::DoConstruct>());
|
|
curEval = &*std::next(curEval->getNestedEvaluations().begin());
|
|
}
|
|
return curEval;
|
|
}
|
|
|
|
static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
int collapseValue = 0) {
|
|
Fortran::lower::pft::Evaluation *curEval =
|
|
getCollapsedLoopEval(eval, collapseValue);
|
|
|
|
for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations())
|
|
converter.genEval(e);
|
|
}
|
|
|
|
static fir::GlobalOp globalInitialization(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
fir::FirOpBuilder &firOpBuilder, const Fortran::semantics::Symbol &sym,
|
|
const Fortran::lower::pft::Variable &var, mlir::Location currentLocation) {
|
|
mlir::Type ty = converter.genType(sym);
|
|
std::string globalName = converter.mangleName(sym);
|
|
mlir::StringAttr linkage = firOpBuilder.createInternalLinkage();
|
|
fir::GlobalOp global =
|
|
firOpBuilder.createGlobal(currentLocation, ty, globalName, linkage);
|
|
|
|
// Create default initialization for non-character scalar.
|
|
if (Fortran::semantics::IsAllocatableOrObjectPointer(&sym)) {
|
|
mlir::Type baseAddrType = ty.dyn_cast<fir::BoxType>().getEleTy();
|
|
Fortran::lower::createGlobalInitialization(
|
|
firOpBuilder, global, [&](fir::FirOpBuilder &b) {
|
|
mlir::Value nullAddr =
|
|
b.createNullConstant(currentLocation, baseAddrType);
|
|
mlir::Value box =
|
|
b.create<fir::EmboxOp>(currentLocation, ty, nullAddr);
|
|
b.create<fir::HasValueOp>(currentLocation, box);
|
|
});
|
|
} else {
|
|
Fortran::lower::createGlobalInitialization(
|
|
firOpBuilder, global, [&](fir::FirOpBuilder &b) {
|
|
mlir::Value undef = b.create<fir::UndefOp>(currentLocation, ty);
|
|
b.create<fir::HasValueOp>(currentLocation, undef);
|
|
});
|
|
}
|
|
|
|
return global;
|
|
}
|
|
|
|
static mlir::Operation *getCompareFromReductionOp(mlir::Operation *reductionOp,
|
|
mlir::Value loadVal) {
|
|
for (mlir::Value reductionOperand : reductionOp->getOperands()) {
|
|
if (mlir::Operation *compareOp = reductionOperand.getDefiningOp()) {
|
|
if (compareOp->getOperand(0) == loadVal ||
|
|
compareOp->getOperand(1) == loadVal)
|
|
assert((mlir::isa<mlir::arith::CmpIOp>(compareOp) ||
|
|
mlir::isa<mlir::arith::CmpFOp>(compareOp)) &&
|
|
"Expected comparison not found in reduction intrinsic");
|
|
return compareOp;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
// Get the extended value for \p val by extracting additional variable
|
|
// information from \p base.
|
|
static fir::ExtendedValue getExtendedValue(fir::ExtendedValue base,
|
|
mlir::Value val) {
|
|
return base.match(
|
|
[&](const fir::MutableBoxValue &box) -> fir::ExtendedValue {
|
|
return fir::MutableBoxValue(val, box.nonDeferredLenParams(), {});
|
|
},
|
|
[&](const auto &) -> fir::ExtendedValue {
|
|
return fir::substBase(base, val);
|
|
});
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
static bool isThreadPrivate(Fortran::lower::SymbolRef sym) {
|
|
if (const auto *details =
|
|
sym->detailsIf<Fortran::semantics::CommonBlockDetails>()) {
|
|
for (const auto &obj : details->objects())
|
|
if (!obj->test(Fortran::semantics::Symbol::Flag::OmpThreadprivate))
|
|
return false;
|
|
return true;
|
|
}
|
|
return sym->test(Fortran::semantics::Symbol::Flag::OmpThreadprivate);
|
|
}
|
|
#endif
|
|
|
|
static void threadPrivatizeVars(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::lower::pft::Evaluation &eval) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
mlir::Location currentLocation = converter.getCurrentLocation();
|
|
mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
|
|
firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
|
|
|
|
// If the symbol corresponds to the original ThreadprivateOp, use the symbol
|
|
// value from that operation to create one ThreadprivateOp copy operation
|
|
// inside the parallel region.
|
|
// In some cases, however, the symbol will correspond to the original,
|
|
// non-threadprivate variable. This can happen, for instance, with a common
|
|
// block, declared in a separate module, used by a parent procedure and
|
|
// privatized in its child procedure.
|
|
auto genThreadprivateOp = [&](Fortran::lower::SymbolRef sym) -> mlir::Value {
|
|
assert(isThreadPrivate(sym));
|
|
mlir::Value symValue = converter.getSymbolAddress(sym);
|
|
mlir::Operation *op = symValue.getDefiningOp();
|
|
if (auto declOp = mlir::dyn_cast<hlfir::DeclareOp>(op))
|
|
op = declOp.getMemref().getDefiningOp();
|
|
if (mlir::isa<mlir::omp::ThreadprivateOp>(op))
|
|
symValue = mlir::dyn_cast<mlir::omp::ThreadprivateOp>(op).getSymAddr();
|
|
return firOpBuilder.create<mlir::omp::ThreadprivateOp>(
|
|
currentLocation, symValue.getType(), symValue);
|
|
};
|
|
|
|
llvm::SetVector<const Fortran::semantics::Symbol *> threadprivateSyms;
|
|
converter.collectSymbolSet(eval, threadprivateSyms,
|
|
Fortran::semantics::Symbol::Flag::OmpThreadprivate,
|
|
/*collectSymbols=*/true,
|
|
/*collectHostAssociatedSymbols=*/true);
|
|
std::set<Fortran::semantics::SourceName> threadprivateSymNames;
|
|
|
|
// For a COMMON block, the ThreadprivateOp is generated for itself instead of
|
|
// its members, so only bind the value of the new copied ThreadprivateOp
|
|
// inside the parallel region to the common block symbol only once for
|
|
// multiple members in one COMMON block.
|
|
llvm::SetVector<const Fortran::semantics::Symbol *> commonSyms;
|
|
for (std::size_t i = 0; i < threadprivateSyms.size(); i++) {
|
|
const Fortran::semantics::Symbol *sym = threadprivateSyms[i];
|
|
mlir::Value symThreadprivateValue;
|
|
// The variable may be used more than once, and each reference has one
|
|
// symbol with the same name. Only do once for references of one variable.
|
|
if (threadprivateSymNames.find(sym->name()) != threadprivateSymNames.end())
|
|
continue;
|
|
threadprivateSymNames.insert(sym->name());
|
|
if (const Fortran::semantics::Symbol *common =
|
|
Fortran::semantics::FindCommonBlockContaining(sym->GetUltimate())) {
|
|
mlir::Value commonThreadprivateValue;
|
|
if (commonSyms.contains(common)) {
|
|
commonThreadprivateValue = converter.getSymbolAddress(*common);
|
|
} else {
|
|
commonThreadprivateValue = genThreadprivateOp(*common);
|
|
converter.bindSymbol(*common, commonThreadprivateValue);
|
|
commonSyms.insert(common);
|
|
}
|
|
symThreadprivateValue = Fortran::lower::genCommonBlockMember(
|
|
converter, currentLocation, *sym, commonThreadprivateValue);
|
|
} else {
|
|
symThreadprivateValue = genThreadprivateOp(*sym);
|
|
}
|
|
|
|
fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*sym);
|
|
fir::ExtendedValue symThreadprivateExv =
|
|
getExtendedValue(sexv, symThreadprivateValue);
|
|
converter.bindSymbol(*sym, symThreadprivateExv);
|
|
}
|
|
|
|
firOpBuilder.restoreInsertionPoint(insPt);
|
|
}
|
|
|
|
static mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter,
|
|
std::size_t loopVarTypeSize) {
|
|
// OpenMP runtime requires 32-bit or 64-bit loop variables.
|
|
loopVarTypeSize = loopVarTypeSize * 8;
|
|
if (loopVarTypeSize < 32) {
|
|
loopVarTypeSize = 32;
|
|
} else if (loopVarTypeSize > 64) {
|
|
loopVarTypeSize = 64;
|
|
mlir::emitWarning(converter.getCurrentLocation(),
|
|
"OpenMP loop iteration variable cannot have more than 64 "
|
|
"bits size and will be narrowed into 64 bits.");
|
|
}
|
|
assert((loopVarTypeSize == 32 || loopVarTypeSize == 64) &&
|
|
"OpenMP loop iteration variable size must be transformed into 32-bit "
|
|
"or 64-bit");
|
|
return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize);
|
|
}
|
|
|
|
static mlir::Operation *
|
|
createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter,
|
|
mlir::Location loc, mlir::Value indexVal,
|
|
const Fortran::semantics::Symbol *sym) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
|
|
firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
|
|
|
|
mlir::Type tempTy = converter.genType(*sym);
|
|
mlir::Value temp = firOpBuilder.create<fir::AllocaOp>(
|
|
loc, tempTy, /*pinned=*/true, /*lengthParams=*/mlir::ValueRange{},
|
|
/*shapeParams*/ mlir::ValueRange{},
|
|
llvm::ArrayRef<mlir::NamedAttribute>{
|
|
fir::getAdaptToByRefAttr(firOpBuilder)});
|
|
converter.bindSymbol(*sym, temp);
|
|
firOpBuilder.restoreInsertionPoint(insPt);
|
|
mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal);
|
|
mlir::Operation *storeOp = firOpBuilder.create<fir::StoreOp>(
|
|
loc, cvtVal, converter.getSymbolAddress(*sym));
|
|
return storeOp;
|
|
}
|
|
|
|
struct OpWithBodyGenInfo {
|
|
/// A type for a code-gen callback function. This takes as argument the op for
|
|
/// which the code is being generated and returns the arguments of the op's
|
|
/// region.
|
|
using GenOMPRegionEntryCBFn =
|
|
std::function<llvm::SmallVector<const Fortran::semantics::Symbol *>(
|
|
mlir::Operation *)>;
|
|
|
|
OpWithBodyGenInfo(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
mlir::Location loc, Fortran::lower::pft::Evaluation &eval)
|
|
: converter(converter), semaCtx(semaCtx), loc(loc), eval(eval) {}
|
|
|
|
OpWithBodyGenInfo &setGenNested(bool value) {
|
|
genNested = value;
|
|
return *this;
|
|
}
|
|
|
|
OpWithBodyGenInfo &setOuterCombined(bool value) {
|
|
outerCombined = value;
|
|
return *this;
|
|
}
|
|
|
|
OpWithBodyGenInfo &setClauses(const Fortran::parser::OmpClauseList *value) {
|
|
clauses = value;
|
|
return *this;
|
|
}
|
|
|
|
OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) {
|
|
dsp = value;
|
|
return *this;
|
|
}
|
|
|
|
OpWithBodyGenInfo &
|
|
setReductions(llvm::SmallVector<const Fortran::semantics::Symbol *> *value1,
|
|
llvm::SmallVector<mlir::Type> *value2) {
|
|
reductionSymbols = value1;
|
|
reductionTypes = value2;
|
|
return *this;
|
|
}
|
|
|
|
OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) {
|
|
genRegionEntryCB = value;
|
|
return *this;
|
|
}
|
|
|
|
/// [inout] converter to use for the clauses.
|
|
Fortran::lower::AbstractConverter &converter;
|
|
/// [in] Semantics context
|
|
Fortran::semantics::SemanticsContext &semaCtx;
|
|
/// [in] location in source code.
|
|
mlir::Location loc;
|
|
/// [in] current PFT node/evaluation.
|
|
Fortran::lower::pft::Evaluation &eval;
|
|
/// [in] whether to generate FIR for nested evaluations
|
|
bool genNested = true;
|
|
/// [in] is this an outer operation - prevents privatization.
|
|
bool outerCombined = false;
|
|
/// [in] list of clauses to process.
|
|
const Fortran::parser::OmpClauseList *clauses = nullptr;
|
|
/// [in] if provided, processes the construct's data-sharing attributes.
|
|
DataSharingProcessor *dsp = nullptr;
|
|
/// [in] if provided, list of reduction symbols
|
|
llvm::SmallVector<const Fortran::semantics::Symbol *> *reductionSymbols =
|
|
nullptr;
|
|
/// [in] if provided, list of reduction types
|
|
llvm::SmallVector<mlir::Type> *reductionTypes = nullptr;
|
|
/// [in] if provided, emits the op's region entry. Otherwise, an emtpy block
|
|
/// is created in the region.
|
|
GenOMPRegionEntryCBFn genRegionEntryCB = nullptr;
|
|
};
|
|
|
|
/// Create the body (block) for an OpenMP Operation.
|
|
///
|
|
/// \param [in] op - the operation the body belongs to.
|
|
/// \param [in] info - options controlling code-gen for the construction.
|
|
template <typename Op>
|
|
static void createBodyOfOp(Op &op, OpWithBodyGenInfo &info) {
|
|
fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder();
|
|
|
|
auto insertMarker = [](fir::FirOpBuilder &builder) {
|
|
mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(),
|
|
builder.getIndexType());
|
|
return undef.getDefiningOp();
|
|
};
|
|
|
|
// If an argument for the region is provided then create the block with that
|
|
// argument. Also update the symbol's address with the mlir argument value.
|
|
// e.g. For loops the argument is the induction variable. And all further
|
|
// uses of the induction variable should use this mlir value.
|
|
auto regionArgs =
|
|
[&]() -> llvm::SmallVector<const Fortran::semantics::Symbol *> {
|
|
if (info.genRegionEntryCB != nullptr) {
|
|
return info.genRegionEntryCB(op);
|
|
}
|
|
|
|
firOpBuilder.createBlock(&op.getRegion());
|
|
return {};
|
|
}();
|
|
// Mark the earliest insertion point.
|
|
mlir::Operation *marker = insertMarker(firOpBuilder);
|
|
|
|
// If it is an unstructured region and is not the outer region of a combined
|
|
// construct, create empty blocks for all evaluations.
|
|
if (info.eval.lowerAsUnstructured() && !info.outerCombined)
|
|
Fortran::lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp,
|
|
mlir::omp::YieldOp>(
|
|
firOpBuilder, info.eval.getNestedEvaluations());
|
|
|
|
// Start with privatization, so that the lowering of the nested
|
|
// code will use the right symbols.
|
|
constexpr bool isLoop = std::is_same_v<Op, mlir::omp::WsLoopOp> ||
|
|
std::is_same_v<Op, mlir::omp::SimdLoopOp>;
|
|
bool privatize = info.clauses && !info.outerCombined;
|
|
|
|
firOpBuilder.setInsertionPoint(marker);
|
|
std::optional<DataSharingProcessor> tempDsp;
|
|
if (privatize) {
|
|
if (!info.dsp) {
|
|
tempDsp.emplace(info.converter, *info.clauses, info.eval);
|
|
tempDsp->processStep1();
|
|
}
|
|
}
|
|
|
|
if constexpr (std::is_same_v<Op, mlir::omp::ParallelOp>) {
|
|
threadPrivatizeVars(info.converter, info.eval);
|
|
if (info.clauses) {
|
|
firOpBuilder.setInsertionPoint(marker);
|
|
ClauseProcessor(info.converter, info.semaCtx, *info.clauses)
|
|
.processCopyin();
|
|
}
|
|
}
|
|
|
|
if (info.genNested) {
|
|
// genFIR(Evaluation&) tries to patch up unterminated blocks, causing
|
|
// a lot of complications for our approach if the terminator generation
|
|
// is delayed past this point. Insert a temporary terminator here, then
|
|
// delete it.
|
|
firOpBuilder.setInsertionPointToEnd(&op.getRegion().back());
|
|
auto *temp = Fortran::lower::genOpenMPTerminator(
|
|
firOpBuilder, op.getOperation(), info.loc);
|
|
firOpBuilder.setInsertionPointAfter(marker);
|
|
genNestedEvaluations(info.converter, info.eval);
|
|
temp->erase();
|
|
}
|
|
|
|
// Get or create a unique exiting block from the given region, or
|
|
// return nullptr if there is no exiting block.
|
|
auto getUniqueExit = [&](mlir::Region ®ion) -> mlir::Block * {
|
|
// Find the blocks where the OMP terminator should go. In simple cases
|
|
// it is the single block in the operation's region. When the region
|
|
// is more complicated, especially with unstructured control flow, there
|
|
// may be multiple blocks, and some of them may have non-OMP terminators
|
|
// resulting from lowering of the code contained within the operation.
|
|
// All the remaining blocks are potential exit points from the op's region.
|
|
//
|
|
// Explicit control flow cannot exit any OpenMP region (other than via
|
|
// STOP), and that is enforced by semantic checks prior to lowering. STOP
|
|
// statements are lowered to a function call.
|
|
|
|
// Collect unterminated blocks.
|
|
llvm::SmallVector<mlir::Block *> exits;
|
|
for (mlir::Block &b : region) {
|
|
if (b.empty() || !b.back().hasTrait<mlir::OpTrait::IsTerminator>())
|
|
exits.push_back(&b);
|
|
}
|
|
|
|
if (exits.empty())
|
|
return nullptr;
|
|
// If there already is a unique exiting block, do not create another one.
|
|
// Additionally, some ops (e.g. omp.sections) require only 1 block in
|
|
// its region.
|
|
if (exits.size() == 1)
|
|
return exits[0];
|
|
mlir::Block *exit = firOpBuilder.createBlock(®ion);
|
|
for (mlir::Block *b : exits) {
|
|
firOpBuilder.setInsertionPointToEnd(b);
|
|
firOpBuilder.create<mlir::cf::BranchOp>(info.loc, exit);
|
|
}
|
|
return exit;
|
|
};
|
|
|
|
if (auto *exitBlock = getUniqueExit(op.getRegion())) {
|
|
firOpBuilder.setInsertionPointToEnd(exitBlock);
|
|
auto *term = Fortran::lower::genOpenMPTerminator(
|
|
firOpBuilder, op.getOperation(), info.loc);
|
|
// Only insert lastprivate code when there actually is an exit block.
|
|
// Such a block may not exist if the nested code produced an infinite
|
|
// loop (this may not make sense in production code, but a user could
|
|
// write that and we should handle it).
|
|
firOpBuilder.setInsertionPoint(term);
|
|
if (privatize) {
|
|
if (!info.dsp) {
|
|
assert(tempDsp.has_value());
|
|
tempDsp->processStep2(op, isLoop);
|
|
} else {
|
|
if (isLoop && regionArgs.size() > 0)
|
|
info.dsp->setLoopIV(info.converter.getSymbolAddress(*regionArgs[0]));
|
|
info.dsp->processStep2(op, isLoop);
|
|
}
|
|
}
|
|
}
|
|
|
|
firOpBuilder.setInsertionPointAfter(marker);
|
|
marker->erase();
|
|
}
|
|
|
|
static void genBodyOfTargetDataOp(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval, bool genNested,
|
|
mlir::omp::DataOp &dataOp,
|
|
const llvm::SmallVector<mlir::Type> &useDeviceTypes,
|
|
const llvm::SmallVector<mlir::Location> &useDeviceLocs,
|
|
const llvm::SmallVector<const Fortran::semantics::Symbol *>
|
|
&useDeviceSymbols,
|
|
const mlir::Location ¤tLocation) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
mlir::Region ®ion = dataOp.getRegion();
|
|
|
|
firOpBuilder.createBlock(®ion, {}, useDeviceTypes, useDeviceLocs);
|
|
|
|
for (auto [argIndex, argSymbol] : llvm::enumerate(useDeviceSymbols)) {
|
|
const mlir::BlockArgument &arg = region.front().getArgument(argIndex);
|
|
fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*argSymbol);
|
|
if (auto refType = arg.getType().dyn_cast<fir::ReferenceType>()) {
|
|
if (fir::isa_builtin_cptr_type(refType.getElementType())) {
|
|
converter.bindSymbol(*argSymbol, arg);
|
|
} else {
|
|
// Avoid capture of a reference to a structured binding.
|
|
const Fortran::semantics::Symbol *sym = argSymbol;
|
|
extVal.match(
|
|
[&](const fir::MutableBoxValue &mbv) {
|
|
converter.bindSymbol(
|
|
*sym,
|
|
fir::MutableBoxValue(
|
|
arg, fir::factory::getNonDeferredLenParams(extVal), {}));
|
|
},
|
|
[&](const auto &) {
|
|
TODO(converter.getCurrentLocation(),
|
|
"use_device clause operand unsupported type");
|
|
});
|
|
}
|
|
} else {
|
|
TODO(converter.getCurrentLocation(),
|
|
"use_device clause operand unsupported type");
|
|
}
|
|
}
|
|
|
|
// Insert dummy instruction to remember the insertion position. The
|
|
// marker will be deleted by clean up passes since there are no uses.
|
|
// Remembering the position for further insertion is important since
|
|
// there are hlfir.declares inserted above while setting block arguments
|
|
// and new code from the body should be inserted after that.
|
|
mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
|
|
dataOp.getOperation()->getLoc(), firOpBuilder.getIndexType());
|
|
|
|
// Create blocks for unstructured regions. This has to be done since
|
|
// blocks are initially allocated with the function as the parent region.
|
|
if (eval.lowerAsUnstructured()) {
|
|
Fortran::lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp,
|
|
mlir::omp::YieldOp>(
|
|
firOpBuilder, eval.getNestedEvaluations());
|
|
}
|
|
|
|
firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);
|
|
|
|
// Set the insertion point after the marker.
|
|
firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
|
|
if (genNested)
|
|
genNestedEvaluations(converter, eval);
|
|
}
|
|
|
|
template <typename OpTy, typename... Args>
|
|
static OpTy genOpWithBody(OpWithBodyGenInfo &info, Args &&...args) {
|
|
auto op = info.converter.getFirOpBuilder().create<OpTy>(
|
|
info.loc, std::forward<Args>(args)...);
|
|
createBodyOfOp<OpTy>(op, info);
|
|
return op;
|
|
}
|
|
|
|
static mlir::omp::MasterOp
|
|
genMasterOp(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval, bool genNested,
|
|
mlir::Location currentLocation) {
|
|
return genOpWithBody<mlir::omp::MasterOp>(
|
|
OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
|
|
.setGenNested(genNested),
|
|
/*resultTypes=*/mlir::TypeRange());
|
|
}
|
|
|
|
static mlir::omp::OrderedRegionOp
|
|
genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval, bool genNested,
|
|
mlir::Location currentLocation) {
|
|
return genOpWithBody<mlir::omp::OrderedRegionOp>(
|
|
OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
|
|
.setGenNested(genNested),
|
|
/*simd=*/false);
|
|
}
|
|
|
|
static mlir::omp::ParallelOp
|
|
genParallelOp(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::lower::SymMap &symTable,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval, bool genNested,
|
|
mlir::Location currentLocation,
|
|
const Fortran::parser::OmpClauseList &clauseList,
|
|
bool outerCombined = false) {
|
|
Fortran::lower::StatementContext stmtCtx;
|
|
mlir::Value ifClauseOperand, numThreadsClauseOperand;
|
|
mlir::omp::ClauseProcBindKindAttr procBindKindAttr;
|
|
llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands,
|
|
reductionVars;
|
|
llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
|
|
llvm::SmallVector<const Fortran::semantics::Symbol *> reductionSymbols;
|
|
|
|
ClauseProcessor cp(converter, semaCtx, clauseList);
|
|
cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Parallel,
|
|
ifClauseOperand);
|
|
cp.processNumThreads(stmtCtx, numThreadsClauseOperand);
|
|
cp.processProcBind(procBindKindAttr);
|
|
cp.processDefault();
|
|
cp.processAllocate(allocatorOperands, allocateOperands);
|
|
if (!outerCombined)
|
|
cp.processReduction(currentLocation, reductionVars, reductionDeclSymbols,
|
|
&reductionSymbols);
|
|
|
|
llvm::SmallVector<mlir::Type> reductionTypes;
|
|
reductionTypes.reserve(reductionVars.size());
|
|
llvm::transform(reductionVars, std::back_inserter(reductionTypes),
|
|
[](mlir::Value v) { return v.getType(); });
|
|
|
|
auto reductionCallback = [&](mlir::Operation *op) {
|
|
llvm::SmallVector<mlir::Location> locs(reductionVars.size(),
|
|
currentLocation);
|
|
auto *block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {},
|
|
reductionTypes, locs);
|
|
for (auto [arg, prv] :
|
|
llvm::zip_equal(reductionSymbols, block->getArguments())) {
|
|
converter.bindSymbol(*arg, prv);
|
|
}
|
|
return reductionSymbols;
|
|
};
|
|
|
|
mlir::UnitAttr byrefAttr;
|
|
if (ReductionProcessor::doReductionByRef(reductionVars))
|
|
byrefAttr = converter.getFirOpBuilder().getUnitAttr();
|
|
|
|
OpWithBodyGenInfo genInfo =
|
|
OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
|
|
.setGenNested(genNested)
|
|
.setOuterCombined(outerCombined)
|
|
.setClauses(&clauseList)
|
|
.setReductions(&reductionSymbols, &reductionTypes)
|
|
.setGenRegionEntryCb(reductionCallback);
|
|
|
|
if (!enableDelayedPrivatization) {
|
|
return genOpWithBody<mlir::omp::ParallelOp>(
|
|
genInfo,
|
|
/*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
|
|
numThreadsClauseOperand, allocateOperands, allocatorOperands,
|
|
reductionVars,
|
|
reductionDeclSymbols.empty()
|
|
? nullptr
|
|
: mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
|
|
reductionDeclSymbols),
|
|
procBindKindAttr, /*private_vars=*/llvm::SmallVector<mlir::Value>{},
|
|
/*privatizers=*/nullptr, byrefAttr);
|
|
}
|
|
|
|
bool privatize = !outerCombined;
|
|
DataSharingProcessor dsp(converter, clauseList, eval,
|
|
/*useDelayedPrivatization=*/true, &symTable);
|
|
|
|
if (privatize)
|
|
dsp.processStep1();
|
|
|
|
const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo();
|
|
|
|
auto genRegionEntryCB = [&](mlir::Operation *op) {
|
|
auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);
|
|
|
|
llvm::SmallVector<mlir::Location> reductionLocs(reductionVars.size(),
|
|
currentLocation);
|
|
|
|
mlir::OperandRange privateVars = parallelOp.getPrivateVars();
|
|
mlir::Region ®ion = parallelOp.getRegion();
|
|
|
|
llvm::SmallVector<mlir::Type> privateVarTypes = reductionTypes;
|
|
privateVarTypes.reserve(privateVarTypes.size() + privateVars.size());
|
|
llvm::transform(privateVars, std::back_inserter(privateVarTypes),
|
|
[](mlir::Value v) { return v.getType(); });
|
|
|
|
llvm::SmallVector<mlir::Location> privateVarLocs = reductionLocs;
|
|
privateVarLocs.reserve(privateVarLocs.size() + privateVars.size());
|
|
llvm::transform(privateVars, std::back_inserter(privateVarLocs),
|
|
[](mlir::Value v) { return v.getLoc(); });
|
|
|
|
converter.getFirOpBuilder().createBlock(®ion, /*insertPt=*/{},
|
|
privateVarTypes, privateVarLocs);
|
|
|
|
llvm::SmallVector<const Fortran::semantics::Symbol *> allSymbols =
|
|
reductionSymbols;
|
|
allSymbols.append(delayedPrivatizationInfo.symbols);
|
|
for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
|
|
converter.bindSymbol(*arg, prv);
|
|
}
|
|
|
|
return allSymbols;
|
|
};
|
|
|
|
// TODO Merge with the reduction CB.
|
|
genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp);
|
|
|
|
llvm::SmallVector<mlir::Attribute> privatizers(
|
|
delayedPrivatizationInfo.privatizers.begin(),
|
|
delayedPrivatizationInfo.privatizers.end());
|
|
|
|
return genOpWithBody<mlir::omp::ParallelOp>(
|
|
genInfo,
|
|
/*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
|
|
numThreadsClauseOperand, allocateOperands, allocatorOperands,
|
|
reductionVars,
|
|
reductionDeclSymbols.empty()
|
|
? nullptr
|
|
: mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
|
|
reductionDeclSymbols),
|
|
procBindKindAttr, delayedPrivatizationInfo.originalAddresses,
|
|
delayedPrivatizationInfo.privatizers.empty()
|
|
? nullptr
|
|
: mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
|
|
privatizers),
|
|
byrefAttr);
|
|
}
|
|
|
|
static mlir::omp::SectionOp
|
|
genSectionOp(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval, bool genNested,
|
|
mlir::Location currentLocation,
|
|
const Fortran::parser::OmpClauseList §ionsClauseList) {
|
|
// Currently only private/firstprivate clause is handled, and
|
|
// all privatization is done within `omp.section` operations.
|
|
return genOpWithBody<mlir::omp::SectionOp>(
|
|
OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
|
|
.setGenNested(genNested)
|
|
.setClauses(§ionsClauseList));
|
|
}
|
|
|
|
static mlir::omp::SingleOp
|
|
genSingleOp(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval, bool genNested,
|
|
mlir::Location currentLocation,
|
|
const Fortran::parser::OmpClauseList &beginClauseList,
|
|
const Fortran::parser::OmpClauseList &endClauseList) {
|
|
llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands;
|
|
llvm::SmallVector<mlir::Value> copyPrivateVars;
|
|
llvm::SmallVector<mlir::Attribute> copyPrivateFuncs;
|
|
mlir::UnitAttr nowaitAttr;
|
|
|
|
ClauseProcessor cp(converter, semaCtx, beginClauseList);
|
|
cp.processAllocate(allocatorOperands, allocateOperands);
|
|
|
|
ClauseProcessor ecp(converter, semaCtx, endClauseList);
|
|
ecp.processNowait(nowaitAttr);
|
|
ecp.processCopyPrivate(currentLocation, copyPrivateVars, copyPrivateFuncs);
|
|
|
|
return genOpWithBody<mlir::omp::SingleOp>(
|
|
OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
|
|
.setGenNested(genNested)
|
|
.setClauses(&beginClauseList),
|
|
allocateOperands, allocatorOperands, copyPrivateVars,
|
|
copyPrivateFuncs.empty()
|
|
? nullptr
|
|
: mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
|
|
copyPrivateFuncs),
|
|
nowaitAttr);
|
|
}
|
|
|
|
static mlir::omp::TaskOp
|
|
genTaskOp(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval, bool genNested,
|
|
mlir::Location currentLocation,
|
|
const Fortran::parser::OmpClauseList &clauseList) {
|
|
Fortran::lower::StatementContext stmtCtx;
|
|
mlir::Value ifClauseOperand, finalClauseOperand, priorityClauseOperand;
|
|
mlir::UnitAttr untiedAttr, mergeableAttr;
|
|
llvm::SmallVector<mlir::Attribute> dependTypeOperands;
|
|
llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands,
|
|
dependOperands;
|
|
|
|
ClauseProcessor cp(converter, semaCtx, clauseList);
|
|
cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Task,
|
|
ifClauseOperand);
|
|
cp.processAllocate(allocatorOperands, allocateOperands);
|
|
cp.processDefault();
|
|
cp.processFinal(stmtCtx, finalClauseOperand);
|
|
cp.processUntied(untiedAttr);
|
|
cp.processMergeable(mergeableAttr);
|
|
cp.processPriority(stmtCtx, priorityClauseOperand);
|
|
cp.processDepend(dependTypeOperands, dependOperands);
|
|
cp.processTODO<Fortran::parser::OmpClause::InReduction,
|
|
Fortran::parser::OmpClause::Detach,
|
|
Fortran::parser::OmpClause::Affinity>(
|
|
currentLocation, llvm::omp::Directive::OMPD_task);
|
|
|
|
return genOpWithBody<mlir::omp::TaskOp>(
|
|
OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
|
|
.setGenNested(genNested)
|
|
.setClauses(&clauseList),
|
|
ifClauseOperand, finalClauseOperand, untiedAttr, mergeableAttr,
|
|
/*in_reduction_vars=*/mlir::ValueRange(),
|
|
/*in_reductions=*/nullptr, priorityClauseOperand,
|
|
dependTypeOperands.empty()
|
|
? nullptr
|
|
: mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
|
|
dependTypeOperands),
|
|
dependOperands, allocateOperands, allocatorOperands);
|
|
}
|
|
|
|
static mlir::omp::TaskGroupOp
|
|
genTaskGroupOp(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval, bool genNested,
|
|
mlir::Location currentLocation,
|
|
const Fortran::parser::OmpClauseList &clauseList) {
|
|
llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands;
|
|
ClauseProcessor cp(converter, semaCtx, clauseList);
|
|
cp.processAllocate(allocatorOperands, allocateOperands);
|
|
cp.processTODO<Fortran::parser::OmpClause::TaskReduction>(
|
|
currentLocation, llvm::omp::Directive::OMPD_taskgroup);
|
|
return genOpWithBody<mlir::omp::TaskGroupOp>(
|
|
OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
|
|
.setGenNested(genNested)
|
|
.setClauses(&clauseList),
|
|
/*task_reduction_vars=*/mlir::ValueRange(),
|
|
/*task_reductions=*/nullptr, allocateOperands, allocatorOperands);
|
|
}
|
|
|
|
// This helper function implements the functionality of "promoting"
|
|
// non-CPTR arguments of use_device_ptr to use_device_addr
|
|
// arguments (automagic conversion of use_device_ptr ->
|
|
// use_device_addr in these cases). The way we do so currently is
|
|
// through the shuffling of operands from the devicePtrOperands to
|
|
// deviceAddrOperands where neccesary and re-organizing the types,
|
|
// locations and symbols to maintain the correct ordering of ptr/addr
|
|
// input -> BlockArg.
|
|
//
|
|
// This effectively implements some deprecated OpenMP functionality
|
|
// that some legacy applications unfortunately depend on
|
|
// (deprecated in specification version 5.2):
|
|
//
|
|
// "If a list item in a use_device_ptr clause is not of type C_PTR,
|
|
// the behavior is as if the list item appeared in a use_device_addr
|
|
// clause. Support for such list items in a use_device_ptr clause
|
|
// is deprecated."
|
|
static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr(
|
|
llvm::SmallVector<mlir::Value> &devicePtrOperands,
|
|
llvm::SmallVector<mlir::Value> &deviceAddrOperands,
|
|
llvm::SmallVector<mlir::Type> &useDeviceTypes,
|
|
llvm::SmallVector<mlir::Location> &useDeviceLocs,
|
|
llvm::SmallVector<const Fortran::semantics::Symbol *> &useDeviceSymbols) {
|
|
auto moveElementToBack = [](size_t idx, auto &vector) {
|
|
auto *iter = std::next(vector.begin(), idx);
|
|
vector.push_back(*iter);
|
|
vector.erase(iter);
|
|
};
|
|
|
|
// Iterate over our use_device_ptr list and shift all non-cptr arguments into
|
|
// use_device_addr.
|
|
for (auto *it = devicePtrOperands.begin(); it != devicePtrOperands.end();) {
|
|
if (!fir::isa_builtin_cptr_type(fir::unwrapRefType(it->getType()))) {
|
|
deviceAddrOperands.push_back(*it);
|
|
// We have to shuffle the symbols around as well, to maintain
|
|
// the correct Input -> BlockArg for use_device_ptr/use_device_addr.
|
|
// NOTE: However, as map's do not seem to be included currently
|
|
// this isn't as pertinent, but we must try to maintain for
|
|
// future alterations. I believe the reason they are not currently
|
|
// is that the BlockArg assign/lowering needs to be extended
|
|
// to a greater set of types.
|
|
auto idx = std::distance(devicePtrOperands.begin(), it);
|
|
moveElementToBack(idx, useDeviceTypes);
|
|
moveElementToBack(idx, useDeviceLocs);
|
|
moveElementToBack(idx, useDeviceSymbols);
|
|
it = devicePtrOperands.erase(it);
|
|
continue;
|
|
}
|
|
++it;
|
|
}
|
|
}
|
|
|
|
static mlir::omp::DataOp
|
|
genDataOp(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval, bool genNested,
|
|
mlir::Location currentLocation,
|
|
const Fortran::parser::OmpClauseList &clauseList) {
|
|
Fortran::lower::StatementContext stmtCtx;
|
|
mlir::Value ifClauseOperand, deviceOperand;
|
|
llvm::SmallVector<mlir::Value> mapOperands, devicePtrOperands,
|
|
deviceAddrOperands;
|
|
llvm::SmallVector<mlir::Type> useDeviceTypes;
|
|
llvm::SmallVector<mlir::Location> useDeviceLocs;
|
|
llvm::SmallVector<const Fortran::semantics::Symbol *> useDeviceSymbols;
|
|
|
|
ClauseProcessor cp(converter, semaCtx, clauseList);
|
|
cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetData,
|
|
ifClauseOperand);
|
|
cp.processDevice(stmtCtx, deviceOperand);
|
|
cp.processUseDevicePtr(devicePtrOperands, useDeviceTypes, useDeviceLocs,
|
|
useDeviceSymbols);
|
|
cp.processUseDeviceAddr(deviceAddrOperands, useDeviceTypes, useDeviceLocs,
|
|
useDeviceSymbols);
|
|
// This function implements the deprecated functionality of use_device_ptr
|
|
// that allows users to provide non-CPTR arguments to it with the caveat
|
|
// that the compiler will treat them as use_device_addr. A lot of legacy
|
|
// code may still depend on this functionality, so we should support it
|
|
// in some manner. We do so currently by simply shifting non-cptr operands
|
|
// from the use_device_ptr list into the front of the use_device_addr list
|
|
// whilst maintaining the ordering of useDeviceLocs, useDeviceSymbols and
|
|
// useDeviceTypes to use_device_ptr/use_device_addr input for BlockArg
|
|
// ordering.
|
|
// TODO: Perhaps create a user provideable compiler option that will
|
|
// re-introduce a hard-error rather than a warning in these cases.
|
|
promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr(
|
|
devicePtrOperands, deviceAddrOperands, useDeviceTypes, useDeviceLocs,
|
|
useDeviceSymbols);
|
|
cp.processMap(currentLocation, llvm::omp::Directive::OMPD_target_data,
|
|
stmtCtx, mapOperands);
|
|
|
|
auto dataOp = converter.getFirOpBuilder().create<mlir::omp::DataOp>(
|
|
currentLocation, ifClauseOperand, deviceOperand, devicePtrOperands,
|
|
deviceAddrOperands, mapOperands);
|
|
genBodyOfTargetDataOp(converter, semaCtx, eval, genNested, dataOp,
|
|
useDeviceTypes, useDeviceLocs, useDeviceSymbols,
|
|
currentLocation);
|
|
return dataOp;
|
|
}
|
|
|
|
template <typename OpTy>
|
|
static OpTy
|
|
genEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
mlir::Location currentLocation,
|
|
const Fortran::parser::OmpClauseList &clauseList) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
Fortran::lower::StatementContext stmtCtx;
|
|
mlir::Value ifClauseOperand, deviceOperand;
|
|
mlir::UnitAttr nowaitAttr;
|
|
llvm::SmallVector<mlir::Value> mapOperands, dependOperands;
|
|
llvm::SmallVector<mlir::Attribute> dependTypeOperands;
|
|
|
|
Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName;
|
|
// GCC 9.3.0 emits a (probably) bogus warning about an unused variable.
|
|
[[maybe_unused]] llvm::omp::Directive directive;
|
|
if constexpr (std::is_same_v<OpTy, mlir::omp::EnterDataOp>) {
|
|
directiveName =
|
|
Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetEnterData;
|
|
directive = llvm::omp::Directive::OMPD_target_enter_data;
|
|
} else if constexpr (std::is_same_v<OpTy, mlir::omp::ExitDataOp>) {
|
|
directiveName =
|
|
Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetExitData;
|
|
directive = llvm::omp::Directive::OMPD_target_exit_data;
|
|
} else if constexpr (std::is_same_v<OpTy, mlir::omp::UpdateDataOp>) {
|
|
directiveName =
|
|
Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetUpdate;
|
|
directive = llvm::omp::Directive::OMPD_target_update;
|
|
} else {
|
|
return nullptr;
|
|
}
|
|
|
|
ClauseProcessor cp(converter, semaCtx, clauseList);
|
|
cp.processIf(directiveName, ifClauseOperand);
|
|
cp.processDevice(stmtCtx, deviceOperand);
|
|
cp.processDepend(dependTypeOperands, dependOperands);
|
|
cp.processNowait(nowaitAttr);
|
|
|
|
if constexpr (std::is_same_v<OpTy, mlir::omp::UpdateDataOp>) {
|
|
cp.processMotionClauses<Fortran::parser::OmpClause::To>(stmtCtx,
|
|
mapOperands);
|
|
cp.processMotionClauses<Fortran::parser::OmpClause::From>(stmtCtx,
|
|
mapOperands);
|
|
|
|
} else {
|
|
cp.processMap(currentLocation, directive, stmtCtx, mapOperands);
|
|
}
|
|
|
|
return firOpBuilder.create<OpTy>(
|
|
currentLocation, ifClauseOperand, deviceOperand,
|
|
dependTypeOperands.empty()
|
|
? nullptr
|
|
: mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
|
|
dependTypeOperands),
|
|
dependOperands, nowaitAttr, mapOperands);
|
|
}
|
|
|
|
// This functions creates a block for the body of the targetOp's region. It adds
|
|
// all the symbols present in mapSymbols as block arguments to this block.
|
|
static void genBodyOfTargetOp(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval, bool genNested,
|
|
mlir::omp::TargetOp &targetOp,
|
|
const llvm::SmallVector<mlir::Type> &mapSymTypes,
|
|
const llvm::SmallVector<mlir::Location> &mapSymLocs,
|
|
const llvm::SmallVector<const Fortran::semantics::Symbol *> &mapSymbols,
|
|
const mlir::Location ¤tLocation) {
|
|
assert(mapSymTypes.size() == mapSymLocs.size());
|
|
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
mlir::Region ®ion = targetOp.getRegion();
|
|
|
|
auto *regionBlock =
|
|
firOpBuilder.createBlock(®ion, {}, mapSymTypes, mapSymLocs);
|
|
|
|
// Clones the `bounds` placing them inside the target region and returns them.
|
|
auto cloneBound = [&](mlir::Value bound) {
|
|
if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
|
|
mlir::Operation *clonedOp = bound.getDefiningOp()->clone();
|
|
regionBlock->push_back(clonedOp);
|
|
return clonedOp->getResult(0);
|
|
}
|
|
TODO(converter.getCurrentLocation(),
|
|
"target map clause operand unsupported bound type");
|
|
};
|
|
|
|
auto cloneBounds = [cloneBound](llvm::ArrayRef<mlir::Value> bounds) {
|
|
llvm::SmallVector<mlir::Value> clonedBounds;
|
|
for (mlir::Value bound : bounds)
|
|
clonedBounds.emplace_back(cloneBound(bound));
|
|
return clonedBounds;
|
|
};
|
|
|
|
// Bind the symbols to their corresponding block arguments.
|
|
for (auto [argIndex, argSymbol] : llvm::enumerate(mapSymbols)) {
|
|
const mlir::BlockArgument &arg = region.getArgument(argIndex);
|
|
// Avoid capture of a reference to a structured binding.
|
|
const Fortran::semantics::Symbol *sym = argSymbol;
|
|
// Structure component symbols don't have bindings.
|
|
if (sym->owner().IsDerivedType())
|
|
continue;
|
|
fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym);
|
|
extVal.match(
|
|
[&](const fir::BoxValue &v) {
|
|
converter.bindSymbol(*sym,
|
|
fir::BoxValue(arg, cloneBounds(v.getLBounds()),
|
|
v.getExplicitParameters(),
|
|
v.getExplicitExtents()));
|
|
},
|
|
[&](const fir::MutableBoxValue &v) {
|
|
converter.bindSymbol(
|
|
*sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()),
|
|
v.getMutableProperties()));
|
|
},
|
|
[&](const fir::ArrayBoxValue &v) {
|
|
converter.bindSymbol(
|
|
*sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()),
|
|
cloneBounds(v.getLBounds()),
|
|
v.getSourceBox()));
|
|
},
|
|
[&](const fir::CharArrayBoxValue &v) {
|
|
converter.bindSymbol(
|
|
*sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()),
|
|
cloneBounds(v.getExtents()),
|
|
cloneBounds(v.getLBounds())));
|
|
},
|
|
[&](const fir::CharBoxValue &v) {
|
|
converter.bindSymbol(*sym,
|
|
fir::CharBoxValue(arg, cloneBound(v.getLen())));
|
|
},
|
|
[&](const fir::UnboxedValue &v) { converter.bindSymbol(*sym, arg); },
|
|
[&](const auto &) {
|
|
TODO(converter.getCurrentLocation(),
|
|
"target map clause operand unsupported type");
|
|
});
|
|
}
|
|
|
|
// Check if cloning the bounds introduced any dependency on the outer region.
|
|
// If so, then either clone them as well if they are MemoryEffectFree, or else
|
|
// copy them to a new temporary and add them to the map and block_argument
|
|
// lists and replace their uses with the new temporary.
|
|
llvm::SetVector<mlir::Value> valuesDefinedAbove;
|
|
mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
|
|
while (!valuesDefinedAbove.empty()) {
|
|
for (mlir::Value val : valuesDefinedAbove) {
|
|
mlir::Operation *valOp = val.getDefiningOp();
|
|
if (mlir::isMemoryEffectFree(valOp)) {
|
|
mlir::Operation *clonedOp = valOp->clone();
|
|
regionBlock->push_front(clonedOp);
|
|
val.replaceUsesWithIf(
|
|
clonedOp->getResult(0), [regionBlock](mlir::OpOperand &use) {
|
|
return use.getOwner()->getBlock() == regionBlock;
|
|
});
|
|
} else {
|
|
auto savedIP = firOpBuilder.getInsertionPoint();
|
|
firOpBuilder.setInsertionPointAfter(valOp);
|
|
auto copyVal =
|
|
firOpBuilder.createTemporary(val.getLoc(), val.getType());
|
|
firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);
|
|
|
|
llvm::SmallVector<mlir::Value> bounds;
|
|
std::stringstream name;
|
|
firOpBuilder.setInsertionPoint(targetOp);
|
|
mlir::Value mapOp = createMapInfoOp(
|
|
firOpBuilder, copyVal.getLoc(), copyVal, mlir::Value{}, name.str(),
|
|
bounds, llvm::SmallVector<mlir::Value>{},
|
|
static_cast<
|
|
std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
|
|
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
|
|
mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType());
|
|
targetOp.getMapOperandsMutable().append(mapOp);
|
|
mlir::Value clonedValArg =
|
|
region.addArgument(copyVal.getType(), copyVal.getLoc());
|
|
firOpBuilder.setInsertionPointToStart(regionBlock);
|
|
auto loadOp = firOpBuilder.create<fir::LoadOp>(clonedValArg.getLoc(),
|
|
clonedValArg);
|
|
val.replaceUsesWithIf(
|
|
loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) {
|
|
return use.getOwner()->getBlock() == regionBlock;
|
|
});
|
|
firOpBuilder.setInsertionPoint(regionBlock, savedIP);
|
|
}
|
|
}
|
|
valuesDefinedAbove.clear();
|
|
mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
|
|
}
|
|
|
|
// Insert dummy instruction to remember the insertion position. The
|
|
// marker will be deleted since there are not uses.
|
|
// In the HLFIR flow there are hlfir.declares inserted above while
|
|
// setting block arguments.
|
|
mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
|
|
targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType());
|
|
|
|
// Create blocks for unstructured regions. This has to be done since
|
|
// blocks are initially allocated with the function as the parent region.
|
|
if (eval.lowerAsUnstructured()) {
|
|
Fortran::lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp,
|
|
mlir::omp::YieldOp>(
|
|
firOpBuilder, eval.getNestedEvaluations());
|
|
}
|
|
|
|
firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);
|
|
|
|
// Create the insertion point after the marker.
|
|
firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
|
|
if (genNested)
|
|
genNestedEvaluations(converter, eval);
|
|
}
|
|
|
|
static mlir::omp::TargetOp
|
|
genTargetOp(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval, bool genNested,
|
|
mlir::Location currentLocation,
|
|
const Fortran::parser::OmpClauseList &clauseList,
|
|
llvm::omp::Directive directive, bool outerCombined = false) {
|
|
Fortran::lower::StatementContext stmtCtx;
|
|
mlir::Value ifClauseOperand, deviceOperand, threadLimitOperand;
|
|
mlir::UnitAttr nowaitAttr;
|
|
llvm::SmallVector<mlir::Attribute> dependTypeOperands;
|
|
llvm::SmallVector<mlir::Value> mapOperands, dependOperands;
|
|
llvm::SmallVector<mlir::Type> mapSymTypes;
|
|
llvm::SmallVector<mlir::Location> mapSymLocs;
|
|
llvm::SmallVector<const Fortran::semantics::Symbol *> mapSymbols;
|
|
|
|
ClauseProcessor cp(converter, semaCtx, clauseList);
|
|
cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Target,
|
|
ifClauseOperand);
|
|
cp.processDevice(stmtCtx, deviceOperand);
|
|
cp.processThreadLimit(stmtCtx, threadLimitOperand);
|
|
cp.processDepend(dependTypeOperands, dependOperands);
|
|
cp.processNowait(nowaitAttr);
|
|
cp.processMap(currentLocation, directive, stmtCtx, mapOperands, &mapSymTypes,
|
|
&mapSymLocs, &mapSymbols);
|
|
|
|
cp.processTODO<Fortran::parser::OmpClause::Private,
|
|
Fortran::parser::OmpClause::Firstprivate,
|
|
Fortran::parser::OmpClause::IsDevicePtr,
|
|
Fortran::parser::OmpClause::HasDeviceAddr,
|
|
Fortran::parser::OmpClause::Reduction,
|
|
Fortran::parser::OmpClause::InReduction,
|
|
Fortran::parser::OmpClause::Allocate,
|
|
Fortran::parser::OmpClause::UsesAllocators,
|
|
Fortran::parser::OmpClause::Defaultmap>(
|
|
currentLocation, llvm::omp::Directive::OMPD_target);
|
|
// 5.8.1 Implicit Data-Mapping Attribute Rules
|
|
// The following code follows the implicit data-mapping rules to map all the
|
|
// symbols used inside the region that have not been explicitly mapped using
|
|
// the map clause.
|
|
auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) {
|
|
if (llvm::find(mapSymbols, &sym) == mapSymbols.end()) {
|
|
mlir::Value baseOp = converter.getSymbolAddress(sym);
|
|
if (!baseOp)
|
|
if (const auto *details = sym.template detailsIf<
|
|
Fortran::semantics::HostAssocDetails>()) {
|
|
baseOp = converter.getSymbolAddress(details->symbol());
|
|
converter.copySymbolBinding(details->symbol(), sym);
|
|
}
|
|
|
|
if (baseOp) {
|
|
llvm::SmallVector<mlir::Value> bounds;
|
|
std::stringstream name;
|
|
fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym);
|
|
name << sym.name().ToString();
|
|
|
|
Fortran::lower::AddrAndBoundsInfo info =
|
|
getDataOperandBaseAddr(converter, converter.getFirOpBuilder(), sym,
|
|
converter.getCurrentLocation());
|
|
if (fir::unwrapRefType(info.addr.getType()).isa<fir::BaseBoxType>())
|
|
bounds =
|
|
Fortran::lower::genBoundsOpsFromBox<mlir::omp::DataBoundsOp,
|
|
mlir::omp::DataBoundsType>(
|
|
converter.getFirOpBuilder(), converter.getCurrentLocation(),
|
|
converter, dataExv, info);
|
|
if (fir::unwrapRefType(info.addr.getType()).isa<fir::SequenceType>()) {
|
|
bool dataExvIsAssumedSize =
|
|
Fortran::semantics::IsAssumedSizeArray(sym.GetUltimate());
|
|
bounds = Fortran::lower::genBaseBoundsOps<mlir::omp::DataBoundsOp,
|
|
mlir::omp::DataBoundsType>(
|
|
converter.getFirOpBuilder(), converter.getCurrentLocation(),
|
|
converter, dataExv, dataExvIsAssumedSize);
|
|
}
|
|
|
|
llvm::omp::OpenMPOffloadMappingFlags mapFlag =
|
|
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
|
|
mlir::omp::VariableCaptureKind captureKind =
|
|
mlir::omp::VariableCaptureKind::ByRef;
|
|
|
|
mlir::Type eleType = baseOp.getType();
|
|
if (auto refType = baseOp.getType().dyn_cast<fir::ReferenceType>())
|
|
eleType = refType.getElementType();
|
|
|
|
// If a variable is specified in declare target link and if device
|
|
// type is not specified as `nohost`, it needs to be mapped tofrom
|
|
mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
|
|
mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym));
|
|
auto declareTargetOp =
|
|
llvm::dyn_cast_if_present<mlir::omp::DeclareTargetInterface>(op);
|
|
if (declareTargetOp && declareTargetOp.isDeclareTarget()) {
|
|
if (declareTargetOp.getDeclareTargetCaptureClause() ==
|
|
mlir::omp::DeclareTargetCaptureClause::link &&
|
|
declareTargetOp.getDeclareTargetDeviceType() !=
|
|
mlir::omp::DeclareTargetDeviceType::nohost) {
|
|
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
|
|
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
|
|
}
|
|
} else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
|
|
captureKind = mlir::omp::VariableCaptureKind::ByCopy;
|
|
} else if (!fir::isa_builtin_cptr_type(eleType)) {
|
|
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
|
|
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
|
|
}
|
|
|
|
mlir::Value mapOp = createMapInfoOp(
|
|
converter.getFirOpBuilder(), baseOp.getLoc(), baseOp, mlir::Value{},
|
|
name.str(), bounds, {},
|
|
static_cast<
|
|
std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
|
|
mapFlag),
|
|
captureKind, baseOp.getType());
|
|
|
|
mapOperands.push_back(mapOp);
|
|
mapSymTypes.push_back(baseOp.getType());
|
|
mapSymLocs.push_back(baseOp.getLoc());
|
|
mapSymbols.push_back(&sym);
|
|
}
|
|
}
|
|
};
|
|
Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap);
|
|
|
|
auto targetOp = converter.getFirOpBuilder().create<mlir::omp::TargetOp>(
|
|
currentLocation, ifClauseOperand, deviceOperand, threadLimitOperand,
|
|
dependTypeOperands.empty()
|
|
? nullptr
|
|
: mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
|
|
dependTypeOperands),
|
|
dependOperands, nowaitAttr, mapOperands);
|
|
|
|
genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSymTypes,
|
|
mapSymLocs, mapSymbols, currentLocation);
|
|
|
|
return targetOp;
|
|
}
|
|
|
|
static mlir::omp::TeamsOp
|
|
genTeamsOp(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval, bool genNested,
|
|
mlir::Location currentLocation,
|
|
const Fortran::parser::OmpClauseList &clauseList,
|
|
bool outerCombined = false) {
|
|
Fortran::lower::StatementContext stmtCtx;
|
|
mlir::Value numTeamsClauseOperand, ifClauseOperand, threadLimitClauseOperand;
|
|
llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands,
|
|
reductionVars;
|
|
llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
|
|
|
|
ClauseProcessor cp(converter, semaCtx, clauseList);
|
|
cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Teams,
|
|
ifClauseOperand);
|
|
cp.processAllocate(allocatorOperands, allocateOperands);
|
|
cp.processDefault();
|
|
cp.processNumTeams(stmtCtx, numTeamsClauseOperand);
|
|
cp.processThreadLimit(stmtCtx, threadLimitClauseOperand);
|
|
cp.processTODO<Fortran::parser::OmpClause::Reduction>(
|
|
currentLocation, llvm::omp::Directive::OMPD_teams);
|
|
|
|
return genOpWithBody<mlir::omp::TeamsOp>(
|
|
OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
|
|
.setGenNested(genNested)
|
|
.setOuterCombined(outerCombined)
|
|
.setClauses(&clauseList),
|
|
/*num_teams_lower=*/nullptr, numTeamsClauseOperand, ifClauseOperand,
|
|
threadLimitClauseOperand, allocateOperands, allocatorOperands,
|
|
reductionVars,
|
|
reductionDeclSymbols.empty()
|
|
? nullptr
|
|
: mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
|
|
reductionDeclSymbols));
|
|
}
|
|
|
|
/// Extract the list of function and variable symbols affected by the given
|
|
/// 'declare target' directive and return the intended device type for them.
|
|
static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
|
|
llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause) {
|
|
|
|
// The default capture type
|
|
mlir::omp::DeclareTargetDeviceType deviceType =
|
|
mlir::omp::DeclareTargetDeviceType::any;
|
|
const auto &spec = std::get<Fortran::parser::OmpDeclareTargetSpecifier>(
|
|
declareTargetConstruct.t);
|
|
|
|
if (const auto *objectList{
|
|
Fortran::parser::Unwrap<Fortran::parser::OmpObjectList>(spec.u)}) {
|
|
// Case: declare target(func, var1, var2)
|
|
gatherFuncAndVarSyms(*objectList, mlir::omp::DeclareTargetCaptureClause::to,
|
|
symbolAndClause);
|
|
} else if (const auto *clauseList{
|
|
Fortran::parser::Unwrap<Fortran::parser::OmpClauseList>(
|
|
spec.u)}) {
|
|
if (clauseList->v.empty()) {
|
|
// Case: declare target, implicit capture of function
|
|
symbolAndClause.emplace_back(
|
|
mlir::omp::DeclareTargetCaptureClause::to,
|
|
eval.getOwningProcedure()->getSubprogramSymbol());
|
|
}
|
|
|
|
ClauseProcessor cp(converter, semaCtx, *clauseList);
|
|
cp.processTo(symbolAndClause);
|
|
cp.processEnter(symbolAndClause);
|
|
cp.processLink(symbolAndClause);
|
|
cp.processDeviceType(deviceType);
|
|
cp.processTODO<Fortran::parser::OmpClause::Indirect>(
|
|
converter.getCurrentLocation(),
|
|
llvm::omp::Directive::OMPD_declare_target);
|
|
}
|
|
|
|
return deviceType;
|
|
}
|
|
|
|
static void collectDeferredDeclareTargets(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
|
|
llvm::SmallVectorImpl<Fortran::lower::OMPDeferredDeclareTargetInfo>
|
|
&deferredDeclareTarget) {
|
|
llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
|
|
mlir::omp::DeclareTargetDeviceType devType = getDeclareTargetInfo(
|
|
converter, semaCtx, eval, declareTargetConstruct, symbolAndClause);
|
|
// Return the device type only if at least one of the targets for the
|
|
// directive is a function or subroutine
|
|
mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
|
|
|
|
for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
|
|
mlir::Operation *op = mod.lookupSymbol(converter.mangleName(
|
|
std::get<const Fortran::semantics::Symbol &>(symClause)));
|
|
|
|
if (!op) {
|
|
deferredDeclareTarget.push_back(
|
|
{std::get<0>(symClause), devType, std::get<1>(symClause)});
|
|
}
|
|
}
|
|
}
|
|
|
|
static std::optional<mlir::omp::DeclareTargetDeviceType>
|
|
getDeclareTargetFunctionDevice(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPDeclareTargetConstruct
|
|
&declareTargetConstruct) {
|
|
llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
|
|
mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo(
|
|
converter, semaCtx, eval, declareTargetConstruct, symbolAndClause);
|
|
|
|
// Return the device type only if at least one of the targets for the
|
|
// directive is a function or subroutine
|
|
mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
|
|
for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
|
|
mlir::Operation *op = mod.lookupSymbol(converter.mangleName(
|
|
std::get<const Fortran::semantics::Symbol &>(symClause)));
|
|
|
|
if (mlir::isa_and_nonnull<mlir::func::FuncOp>(op))
|
|
return deviceType;
|
|
}
|
|
|
|
return std::nullopt;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// genOMP() Code generation helper functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static void
|
|
genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval, bool genNested,
|
|
const Fortran::parser::OpenMPSimpleStandaloneConstruct
|
|
&simpleStandaloneConstruct) {
|
|
const auto &directive =
|
|
std::get<Fortran::parser::OmpSimpleStandaloneDirective>(
|
|
simpleStandaloneConstruct.t);
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
const auto &opClauseList =
|
|
std::get<Fortran::parser::OmpClauseList>(simpleStandaloneConstruct.t);
|
|
mlir::Location currentLocation = converter.genLocation(directive.source);
|
|
|
|
switch (directive.v) {
|
|
default:
|
|
break;
|
|
case llvm::omp::Directive::OMPD_barrier:
|
|
firOpBuilder.create<mlir::omp::BarrierOp>(currentLocation);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_taskwait:
|
|
ClauseProcessor(converter, semaCtx, opClauseList)
|
|
.processTODO<Fortran::parser::OmpClause::Depend,
|
|
Fortran::parser::OmpClause::Nowait>(
|
|
currentLocation, llvm::omp::Directive::OMPD_taskwait);
|
|
firOpBuilder.create<mlir::omp::TaskwaitOp>(currentLocation);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_taskyield:
|
|
firOpBuilder.create<mlir::omp::TaskyieldOp>(currentLocation);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_target_data:
|
|
genDataOp(converter, semaCtx, eval, genNested, currentLocation,
|
|
opClauseList);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_target_enter_data:
|
|
genEnterExitUpdateDataOp<mlir::omp::EnterDataOp>(
|
|
converter, semaCtx, currentLocation, opClauseList);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_target_exit_data:
|
|
genEnterExitUpdateDataOp<mlir::omp::ExitDataOp>(
|
|
converter, semaCtx, currentLocation, opClauseList);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_target_update:
|
|
genEnterExitUpdateDataOp<mlir::omp::UpdateDataOp>(
|
|
converter, semaCtx, currentLocation, opClauseList);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_ordered:
|
|
TODO(currentLocation, "OMPD_ordered");
|
|
}
|
|
}
|
|
|
|
static void
|
|
genOmpFlush(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPFlushConstruct &flushConstruct) {
|
|
llvm::SmallVector<mlir::Value, 4> operandRange;
|
|
if (const auto &ompObjectList =
|
|
std::get<std::optional<Fortran::parser::OmpObjectList>>(
|
|
flushConstruct.t))
|
|
genObjectList(*ompObjectList, converter, operandRange);
|
|
const auto &memOrderClause =
|
|
std::get<std::optional<std::list<Fortran::parser::OmpMemoryOrderClause>>>(
|
|
flushConstruct.t);
|
|
if (memOrderClause && memOrderClause->size() > 0)
|
|
TODO(converter.getCurrentLocation(), "Handle OmpMemoryOrderClause");
|
|
converter.getFirOpBuilder().create<mlir::omp::FlushOp>(
|
|
converter.getCurrentLocation(), operandRange);
|
|
}
|
|
|
|
static void
|
|
genOMP(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::lower::SymMap &symTable,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPStandaloneConstruct &standaloneConstruct) {
|
|
std::visit(
|
|
Fortran::common::visitors{
|
|
[&](const Fortran::parser::OpenMPSimpleStandaloneConstruct
|
|
&simpleStandaloneConstruct) {
|
|
genOmpSimpleStandalone(converter, semaCtx, eval,
|
|
/*genNested=*/true,
|
|
simpleStandaloneConstruct);
|
|
},
|
|
[&](const Fortran::parser::OpenMPFlushConstruct &flushConstruct) {
|
|
genOmpFlush(converter, semaCtx, eval, flushConstruct);
|
|
},
|
|
[&](const Fortran::parser::OpenMPCancelConstruct &cancelConstruct) {
|
|
TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct");
|
|
},
|
|
[&](const Fortran::parser::OpenMPCancellationPointConstruct
|
|
&cancellationPointConstruct) {
|
|
TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct");
|
|
},
|
|
},
|
|
standaloneConstruct.u);
|
|
}
|
|
|
|
static void convertLoopBounds(Fortran::lower::AbstractConverter &converter,
|
|
mlir::Location loc,
|
|
llvm::SmallVectorImpl<mlir::Value> &lowerBound,
|
|
llvm::SmallVectorImpl<mlir::Value> &upperBound,
|
|
llvm::SmallVectorImpl<mlir::Value> &step,
|
|
std::size_t loopVarTypeSize) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
// The types of lower bound, upper bound, and step are converted into the
|
|
// type of the loop variable if necessary.
|
|
mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
|
|
for (unsigned it = 0; it < (unsigned)lowerBound.size(); it++) {
|
|
lowerBound[it] =
|
|
firOpBuilder.createConvert(loc, loopVarType, lowerBound[it]);
|
|
upperBound[it] =
|
|
firOpBuilder.createConvert(loc, loopVarType, upperBound[it]);
|
|
step[it] = firOpBuilder.createConvert(loc, loopVarType, step[it]);
|
|
}
|
|
}
|
|
|
|
static llvm::SmallVector<const Fortran::semantics::Symbol *>
|
|
genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter,
|
|
mlir::Location &loc,
|
|
const llvm::SmallVector<const Fortran::semantics::Symbol *> &args) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
auto ®ion = op->getRegion(0);
|
|
|
|
std::size_t loopVarTypeSize = 0;
|
|
for (const Fortran::semantics::Symbol *arg : args)
|
|
loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
|
|
mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
|
|
llvm::SmallVector<mlir::Type> tiv(args.size(), loopVarType);
|
|
llvm::SmallVector<mlir::Location> locs(args.size(), loc);
|
|
firOpBuilder.createBlock(®ion, {}, tiv, locs);
|
|
// The argument is not currently in memory, so make a temporary for the
|
|
// argument, and store it there, then bind that location to the argument.
|
|
mlir::Operation *storeOp = nullptr;
|
|
for (auto [argIndex, argSymbol] : llvm::enumerate(args)) {
|
|
mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex));
|
|
storeOp =
|
|
createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
|
|
}
|
|
firOpBuilder.setInsertionPointAfter(storeOp);
|
|
|
|
return args;
|
|
}
|
|
|
|
static llvm::SmallVector<const Fortran::semantics::Symbol *>
|
|
genLoopAndReductionVars(
|
|
mlir::Operation *op, Fortran::lower::AbstractConverter &converter,
|
|
mlir::Location &loc,
|
|
const llvm::SmallVector<const Fortran::semantics::Symbol *> &loopArgs,
|
|
const llvm::SmallVector<const Fortran::semantics::Symbol *> &reductionArgs,
|
|
llvm::SmallVector<mlir::Type> &reductionTypes) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
|
|
llvm::SmallVector<mlir::Type> blockArgTypes;
|
|
llvm::SmallVector<mlir::Location> blockArgLocs;
|
|
blockArgTypes.reserve(loopArgs.size() + reductionArgs.size());
|
|
blockArgLocs.reserve(blockArgTypes.size());
|
|
mlir::Block *entryBlock;
|
|
|
|
if (loopArgs.size()) {
|
|
std::size_t loopVarTypeSize = 0;
|
|
for (const Fortran::semantics::Symbol *arg : loopArgs)
|
|
loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
|
|
mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
|
|
std::fill_n(std::back_inserter(blockArgTypes), loopArgs.size(),
|
|
loopVarType);
|
|
std::fill_n(std::back_inserter(blockArgLocs), loopArgs.size(), loc);
|
|
}
|
|
if (reductionArgs.size()) {
|
|
llvm::copy(reductionTypes, std::back_inserter(blockArgTypes));
|
|
std::fill_n(std::back_inserter(blockArgLocs), reductionArgs.size(), loc);
|
|
}
|
|
entryBlock = firOpBuilder.createBlock(&op->getRegion(0), {}, blockArgTypes,
|
|
blockArgLocs);
|
|
// The argument is not currently in memory, so make a temporary for the
|
|
// argument, and store it there, then bind that location to the argument.
|
|
if (loopArgs.size()) {
|
|
mlir::Operation *storeOp = nullptr;
|
|
for (auto [argIndex, argSymbol] : llvm::enumerate(loopArgs)) {
|
|
mlir::Value indexVal =
|
|
fir::getBase(op->getRegion(0).front().getArgument(argIndex));
|
|
storeOp =
|
|
createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
|
|
}
|
|
firOpBuilder.setInsertionPointAfter(storeOp);
|
|
}
|
|
// Bind the reduction arguments to their block arguments
|
|
for (auto [arg, prv] : llvm::zip_equal(
|
|
reductionArgs,
|
|
llvm::drop_begin(entryBlock->getArguments(), loopArgs.size()))) {
|
|
converter.bindSymbol(*arg, prv);
|
|
}
|
|
|
|
return loopArgs;
|
|
}
|
|
|
|
static void
|
|
createSimdLoop(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
llvm::omp::Directive ompDirective,
|
|
const Fortran::parser::OmpClauseList &loopOpClauseList,
|
|
mlir::Location loc) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
DataSharingProcessor dsp(converter, loopOpClauseList, eval);
|
|
dsp.processStep1();
|
|
|
|
Fortran::lower::StatementContext stmtCtx;
|
|
mlir::Value scheduleChunkClauseOperand, ifClauseOperand;
|
|
llvm::SmallVector<mlir::Value> lowerBound, upperBound, step, reductionVars;
|
|
llvm::SmallVector<mlir::Value> alignedVars, nontemporalVars;
|
|
llvm::SmallVector<const Fortran::semantics::Symbol *> iv;
|
|
llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
|
|
mlir::omp::ClauseOrderKindAttr orderClauseOperand;
|
|
mlir::IntegerAttr simdlenClauseOperand, safelenClauseOperand;
|
|
std::size_t loopVarTypeSize;
|
|
|
|
ClauseProcessor cp(converter, semaCtx, loopOpClauseList);
|
|
cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv,
|
|
loopVarTypeSize);
|
|
cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand);
|
|
cp.processReduction(loc, reductionVars, reductionDeclSymbols);
|
|
cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Simd,
|
|
ifClauseOperand);
|
|
cp.processSimdlen(simdlenClauseOperand);
|
|
cp.processSafelen(safelenClauseOperand);
|
|
cp.processTODO<Fortran::parser::OmpClause::Aligned,
|
|
Fortran::parser::OmpClause::Allocate,
|
|
Fortran::parser::OmpClause::Linear,
|
|
Fortran::parser::OmpClause::Nontemporal,
|
|
Fortran::parser::OmpClause::Order>(loc, ompDirective);
|
|
|
|
convertLoopBounds(converter, loc, lowerBound, upperBound, step,
|
|
loopVarTypeSize);
|
|
|
|
mlir::TypeRange resultType;
|
|
auto simdLoopOp = firOpBuilder.create<mlir::omp::SimdLoopOp>(
|
|
loc, resultType, lowerBound, upperBound, step, alignedVars,
|
|
/*alignment_values=*/nullptr, ifClauseOperand, nontemporalVars,
|
|
orderClauseOperand, simdlenClauseOperand, safelenClauseOperand,
|
|
/*inclusive=*/firOpBuilder.getUnitAttr());
|
|
|
|
auto *nestedEval = getCollapsedLoopEval(
|
|
eval, Fortran::lower::getCollapseValue(loopOpClauseList));
|
|
|
|
auto ivCallback = [&](mlir::Operation *op) {
|
|
return genLoopVars(op, converter, loc, iv);
|
|
};
|
|
|
|
createBodyOfOp<mlir::omp::SimdLoopOp>(
|
|
simdLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval)
|
|
.setClauses(&loopOpClauseList)
|
|
.setDataSharingProcessor(&dsp)
|
|
.setGenRegionEntryCb(ivCallback));
|
|
}
|
|
|
|
static void createWsLoop(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
llvm::omp::Directive ompDirective,
|
|
const Fortran::parser::OmpClauseList &beginClauseList,
|
|
const Fortran::parser::OmpClauseList *endClauseList,
|
|
mlir::Location loc) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
DataSharingProcessor dsp(converter, beginClauseList, eval);
|
|
dsp.processStep1();
|
|
|
|
Fortran::lower::StatementContext stmtCtx;
|
|
mlir::Value scheduleChunkClauseOperand;
|
|
llvm::SmallVector<mlir::Value> lowerBound, upperBound, step, reductionVars;
|
|
llvm::SmallVector<mlir::Value> linearVars, linearStepVars;
|
|
llvm::SmallVector<const Fortran::semantics::Symbol *> iv;
|
|
llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
|
|
llvm::SmallVector<const Fortran::semantics::Symbol *> reductionSymbols;
|
|
mlir::omp::ClauseOrderKindAttr orderClauseOperand;
|
|
mlir::omp::ClauseScheduleKindAttr scheduleValClauseOperand;
|
|
mlir::UnitAttr nowaitClauseOperand, byrefOperand, scheduleSimdClauseOperand;
|
|
mlir::IntegerAttr orderedClauseOperand;
|
|
mlir::omp::ScheduleModifierAttr scheduleModClauseOperand;
|
|
std::size_t loopVarTypeSize;
|
|
|
|
ClauseProcessor cp(converter, semaCtx, beginClauseList);
|
|
cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv,
|
|
loopVarTypeSize);
|
|
cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand);
|
|
cp.processReduction(loc, reductionVars, reductionDeclSymbols,
|
|
&reductionSymbols);
|
|
cp.processTODO<Fortran::parser::OmpClause::Linear,
|
|
Fortran::parser::OmpClause::Order>(loc, ompDirective);
|
|
|
|
convertLoopBounds(converter, loc, lowerBound, upperBound, step,
|
|
loopVarTypeSize);
|
|
|
|
if (ReductionProcessor::doReductionByRef(reductionVars))
|
|
byrefOperand = firOpBuilder.getUnitAttr();
|
|
|
|
auto wsLoopOp = firOpBuilder.create<mlir::omp::WsLoopOp>(
|
|
loc, lowerBound, upperBound, step, linearVars, linearStepVars,
|
|
reductionVars,
|
|
reductionDeclSymbols.empty()
|
|
? nullptr
|
|
: mlir::ArrayAttr::get(firOpBuilder.getContext(),
|
|
reductionDeclSymbols),
|
|
scheduleValClauseOperand, scheduleChunkClauseOperand,
|
|
/*schedule_modifiers=*/nullptr,
|
|
/*simd_modifier=*/nullptr, nowaitClauseOperand, byrefOperand,
|
|
orderedClauseOperand, orderClauseOperand,
|
|
/*inclusive=*/firOpBuilder.getUnitAttr());
|
|
|
|
// Handle attribute based clauses.
|
|
if (cp.processOrdered(orderedClauseOperand))
|
|
wsLoopOp.setOrderedValAttr(orderedClauseOperand);
|
|
|
|
if (cp.processSchedule(scheduleValClauseOperand, scheduleModClauseOperand,
|
|
scheduleSimdClauseOperand)) {
|
|
wsLoopOp.setScheduleValAttr(scheduleValClauseOperand);
|
|
wsLoopOp.setScheduleModifierAttr(scheduleModClauseOperand);
|
|
wsLoopOp.setSimdModifierAttr(scheduleSimdClauseOperand);
|
|
}
|
|
// In FORTRAN `nowait` clause occur at the end of `omp do` directive.
|
|
// i.e
|
|
// !$omp do
|
|
// <...>
|
|
// !$omp end do nowait
|
|
if (endClauseList) {
|
|
if (ClauseProcessor(converter, semaCtx, *endClauseList)
|
|
.processNowait(nowaitClauseOperand))
|
|
wsLoopOp.setNowaitAttr(nowaitClauseOperand);
|
|
}
|
|
|
|
auto *nestedEval = getCollapsedLoopEval(
|
|
eval, Fortran::lower::getCollapseValue(beginClauseList));
|
|
|
|
llvm::SmallVector<mlir::Type> reductionTypes;
|
|
reductionTypes.reserve(reductionVars.size());
|
|
llvm::transform(reductionVars, std::back_inserter(reductionTypes),
|
|
[](mlir::Value v) { return v.getType(); });
|
|
|
|
auto ivCallback = [&](mlir::Operation *op) {
|
|
return genLoopAndReductionVars(op, converter, loc, iv, reductionSymbols,
|
|
reductionTypes);
|
|
};
|
|
|
|
createBodyOfOp<mlir::omp::WsLoopOp>(
|
|
wsLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval)
|
|
.setClauses(&beginClauseList)
|
|
.setDataSharingProcessor(&dsp)
|
|
.setReductions(&reductionSymbols, &reductionTypes)
|
|
.setGenRegionEntryCb(ivCallback));
|
|
}
|
|
|
|
static void createSimdWsLoop(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive ompDirective,
|
|
const Fortran::parser::OmpClauseList &beginClauseList,
|
|
const Fortran::parser::OmpClauseList *endClauseList, mlir::Location loc) {
|
|
ClauseProcessor cp(converter, semaCtx, beginClauseList);
|
|
cp.processTODO<
|
|
Fortran::parser::OmpClause::Aligned, Fortran::parser::OmpClause::Allocate,
|
|
Fortran::parser::OmpClause::Linear, Fortran::parser::OmpClause::Safelen,
|
|
Fortran::parser::OmpClause::Simdlen, Fortran::parser::OmpClause::Order>(
|
|
loc, ompDirective);
|
|
// TODO: Add support for vectorization - add vectorization hints inside loop
|
|
// body.
|
|
// OpenMP standard does not specify the length of vector instructions.
|
|
// Currently we safely assume that for !$omp do simd pragma the SIMD length
|
|
// is equal to 1 (i.e. we generate standard workshare loop).
|
|
// When support for vectorization is enabled, then we need to add handling of
|
|
// if clause. Currently if clause can be skipped because we always assume
|
|
// SIMD length = 1.
|
|
createWsLoop(converter, semaCtx, eval, ompDirective, beginClauseList,
|
|
endClauseList, loc);
|
|
}
|
|
|
|
static void genOMP(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::lower::SymMap &symTable,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPLoopConstruct &loopConstruct) {
|
|
const auto &beginLoopDirective =
|
|
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t);
|
|
const auto &loopOpClauseList =
|
|
std::get<Fortran::parser::OmpClauseList>(beginLoopDirective.t);
|
|
mlir::Location currentLocation =
|
|
converter.genLocation(beginLoopDirective.source);
|
|
const auto ompDirective =
|
|
std::get<Fortran::parser::OmpLoopDirective>(beginLoopDirective.t).v;
|
|
|
|
const auto *endClauseList = [&]() {
|
|
using RetTy = const Fortran::parser::OmpClauseList *;
|
|
if (auto &endLoopDirective =
|
|
std::get<std::optional<Fortran::parser::OmpEndLoopDirective>>(
|
|
loopConstruct.t)) {
|
|
return RetTy(
|
|
&std::get<Fortran::parser::OmpClauseList>((*endLoopDirective).t));
|
|
}
|
|
return RetTy();
|
|
}();
|
|
|
|
bool validDirective = false;
|
|
if (llvm::omp::topTaskloopSet.test(ompDirective)) {
|
|
validDirective = true;
|
|
TODO(currentLocation, "Taskloop construct");
|
|
} else {
|
|
// Create omp.{target, teams, distribute, parallel} nested operations
|
|
if ((llvm::omp::allTargetSet & llvm::omp::loopConstructSet)
|
|
.test(ompDirective)) {
|
|
validDirective = true;
|
|
genTargetOp(converter, semaCtx, eval, /*genNested=*/false,
|
|
currentLocation, loopOpClauseList, ompDirective,
|
|
/*outerCombined=*/true);
|
|
}
|
|
if ((llvm::omp::allTeamsSet & llvm::omp::loopConstructSet)
|
|
.test(ompDirective)) {
|
|
validDirective = true;
|
|
genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
|
|
loopOpClauseList,
|
|
/*outerCombined=*/true);
|
|
}
|
|
if (llvm::omp::allDistributeSet.test(ompDirective)) {
|
|
validDirective = true;
|
|
TODO(currentLocation, "Distribute construct");
|
|
}
|
|
if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet)
|
|
.test(ompDirective)) {
|
|
validDirective = true;
|
|
genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
|
|
currentLocation, loopOpClauseList,
|
|
/*outerCombined=*/true);
|
|
}
|
|
}
|
|
if ((llvm::omp::allDoSet | llvm::omp::allSimdSet).test(ompDirective))
|
|
validDirective = true;
|
|
|
|
if (!validDirective) {
|
|
TODO(currentLocation, "Unhandled loop directive (" +
|
|
llvm::omp::getOpenMPDirectiveName(ompDirective) +
|
|
")");
|
|
}
|
|
|
|
if (llvm::omp::allDoSimdSet.test(ompDirective)) {
|
|
// 2.9.3.2 Workshare SIMD construct
|
|
createSimdWsLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList,
|
|
endClauseList, currentLocation);
|
|
|
|
} else if (llvm::omp::allSimdSet.test(ompDirective)) {
|
|
// 2.9.3.1 SIMD construct
|
|
createSimdLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList,
|
|
currentLocation);
|
|
genOpenMPReduction(converter, semaCtx, loopOpClauseList);
|
|
} else {
|
|
createWsLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList,
|
|
endClauseList, currentLocation);
|
|
}
|
|
}
|
|
|
|
static void
|
|
genOMP(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::lower::SymMap &symTable,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPBlockConstruct &blockConstruct) {
|
|
const auto &beginBlockDirective =
|
|
std::get<Fortran::parser::OmpBeginBlockDirective>(blockConstruct.t);
|
|
const auto &endBlockDirective =
|
|
std::get<Fortran::parser::OmpEndBlockDirective>(blockConstruct.t);
|
|
const auto &directive =
|
|
std::get<Fortran::parser::OmpBlockDirective>(beginBlockDirective.t);
|
|
const auto &beginClauseList =
|
|
std::get<Fortran::parser::OmpClauseList>(beginBlockDirective.t);
|
|
const auto &endClauseList =
|
|
std::get<Fortran::parser::OmpClauseList>(endBlockDirective.t);
|
|
|
|
for (const Fortran::parser::OmpClause &clause : beginClauseList.v) {
|
|
mlir::Location clauseLocation = converter.genLocation(clause.source);
|
|
if (!std::get_if<Fortran::parser::OmpClause::If>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::NumThreads>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::ProcBind>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::Allocate>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::Default>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::Final>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::Priority>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::Reduction>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::Depend>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::Private>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::Firstprivate>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::Copyin>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::Shared>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::Threads>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::Map>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::UseDevicePtr>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::UseDeviceAddr>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::ThreadLimit>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::NumTeams>(&clause.u)) {
|
|
TODO(clauseLocation, "OpenMP Block construct clause");
|
|
}
|
|
}
|
|
|
|
for (const auto &clause : endClauseList.v) {
|
|
mlir::Location clauseLocation = converter.genLocation(clause.source);
|
|
if (!std::get_if<Fortran::parser::OmpClause::Nowait>(&clause.u) &&
|
|
!std::get_if<Fortran::parser::OmpClause::Copyprivate>(&clause.u))
|
|
TODO(clauseLocation, "OpenMP Block construct clause");
|
|
}
|
|
|
|
bool singleDirective = true;
|
|
mlir::Location currentLocation = converter.genLocation(directive.source);
|
|
switch (directive.v) {
|
|
case llvm::omp::Directive::OMPD_master:
|
|
genMasterOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_ordered:
|
|
genOrderedRegionOp(converter, semaCtx, eval, /*genNested=*/true,
|
|
currentLocation);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_parallel:
|
|
genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
|
|
currentLocation, beginClauseList);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_single:
|
|
genSingleOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
|
|
beginClauseList, endClauseList);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_target:
|
|
genTargetOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
|
|
beginClauseList, directive.v);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_target_data:
|
|
genDataOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
|
|
beginClauseList);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_task:
|
|
genTaskOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
|
|
beginClauseList);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_taskgroup:
|
|
genTaskGroupOp(converter, semaCtx, eval, /*genNested=*/true,
|
|
currentLocation, beginClauseList);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_teams:
|
|
genTeamsOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
|
|
beginClauseList,
|
|
/*outerCombined=*/false);
|
|
break;
|
|
case llvm::omp::Directive::OMPD_workshare:
|
|
// FIXME: Workshare is not a commonly used OpenMP construct, an
|
|
// implementation for this feature will come later. For the codes
|
|
// that use this construct, add a single construct for now.
|
|
genSingleOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
|
|
beginClauseList, endClauseList);
|
|
break;
|
|
default:
|
|
singleDirective = false;
|
|
break;
|
|
}
|
|
|
|
if (singleDirective)
|
|
return;
|
|
|
|
// Codegen for combined directives
|
|
bool combinedDirective = false;
|
|
if ((llvm::omp::allTargetSet & llvm::omp::blockConstructSet)
|
|
.test(directive.v)) {
|
|
genTargetOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
|
|
beginClauseList, directive.v,
|
|
/*outerCombined=*/true);
|
|
combinedDirective = true;
|
|
}
|
|
if ((llvm::omp::allTeamsSet & llvm::omp::blockConstructSet)
|
|
.test(directive.v)) {
|
|
genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
|
|
beginClauseList);
|
|
combinedDirective = true;
|
|
}
|
|
if ((llvm::omp::allParallelSet & llvm::omp::blockConstructSet)
|
|
.test(directive.v)) {
|
|
bool outerCombined =
|
|
directive.v != llvm::omp::Directive::OMPD_target_parallel;
|
|
genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
|
|
currentLocation, beginClauseList, outerCombined);
|
|
combinedDirective = true;
|
|
}
|
|
if ((llvm::omp::workShareSet & llvm::omp::blockConstructSet)
|
|
.test(directive.v)) {
|
|
genSingleOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
|
|
beginClauseList, endClauseList);
|
|
combinedDirective = true;
|
|
}
|
|
if (!combinedDirective)
|
|
TODO(currentLocation, "Unhandled block directive (" +
|
|
llvm::omp::getOpenMPDirectiveName(directive.v) +
|
|
")");
|
|
|
|
genNestedEvaluations(converter, eval);
|
|
}
|
|
|
|
static void
|
|
genOMP(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::lower::SymMap &symTable,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPCriticalConstruct &criticalConstruct) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
mlir::Location currentLocation = converter.getCurrentLocation();
|
|
mlir::IntegerAttr hintClauseOp;
|
|
std::string name;
|
|
const Fortran::parser::OmpCriticalDirective &cd =
|
|
std::get<Fortran::parser::OmpCriticalDirective>(criticalConstruct.t);
|
|
if (std::get<std::optional<Fortran::parser::Name>>(cd.t).has_value()) {
|
|
name =
|
|
std::get<std::optional<Fortran::parser::Name>>(cd.t).value().ToString();
|
|
}
|
|
|
|
const auto &clauseList = std::get<Fortran::parser::OmpClauseList>(cd.t);
|
|
ClauseProcessor(converter, semaCtx, clauseList).processHint(hintClauseOp);
|
|
|
|
mlir::omp::CriticalOp criticalOp = [&]() {
|
|
if (name.empty()) {
|
|
return firOpBuilder.create<mlir::omp::CriticalOp>(
|
|
currentLocation, mlir::FlatSymbolRefAttr());
|
|
}
|
|
mlir::ModuleOp module = firOpBuilder.getModule();
|
|
mlir::OpBuilder modBuilder(module.getBodyRegion());
|
|
auto global = module.lookupSymbol<mlir::omp::CriticalDeclareOp>(name);
|
|
if (!global)
|
|
global = modBuilder.create<mlir::omp::CriticalDeclareOp>(
|
|
currentLocation,
|
|
mlir::StringAttr::get(firOpBuilder.getContext(), name), hintClauseOp);
|
|
return firOpBuilder.create<mlir::omp::CriticalOp>(
|
|
currentLocation, mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(),
|
|
global.getSymName()));
|
|
}();
|
|
auto genInfo = OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval);
|
|
createBodyOfOp<mlir::omp::CriticalOp>(criticalOp, genInfo);
|
|
}
|
|
|
|
static void
|
|
genOMP(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::lower::SymMap &symTable,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPSectionsConstruct §ionsConstruct) {
|
|
mlir::Location currentLocation = converter.getCurrentLocation();
|
|
llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands;
|
|
mlir::UnitAttr nowaitClauseOperand;
|
|
const auto &beginSectionsDirective =
|
|
std::get<Fortran::parser::OmpBeginSectionsDirective>(sectionsConstruct.t);
|
|
const auto §ionsClauseList =
|
|
std::get<Fortran::parser::OmpClauseList>(beginSectionsDirective.t);
|
|
|
|
// Process clauses before optional omp.parallel, so that new variables are
|
|
// allocated outside of the parallel region
|
|
ClauseProcessor cp(converter, semaCtx, sectionsClauseList);
|
|
cp.processSectionsReduction(currentLocation);
|
|
cp.processAllocate(allocatorOperands, allocateOperands);
|
|
|
|
llvm::omp::Directive dir =
|
|
std::get<Fortran::parser::OmpSectionsDirective>(beginSectionsDirective.t)
|
|
.v;
|
|
|
|
// Parallel wrapper of PARALLEL SECTIONS construct
|
|
if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
|
|
genParallelOp(converter, symTable, semaCtx, eval,
|
|
/*genNested=*/false, currentLocation, sectionsClauseList,
|
|
/*outerCombined=*/true);
|
|
} else {
|
|
const auto &endSectionsDirective =
|
|
std::get<Fortran::parser::OmpEndSectionsDirective>(sectionsConstruct.t);
|
|
const auto &endSectionsClauseList =
|
|
std::get<Fortran::parser::OmpClauseList>(endSectionsDirective.t);
|
|
ClauseProcessor(converter, semaCtx, endSectionsClauseList)
|
|
.processNowait(nowaitClauseOperand);
|
|
}
|
|
|
|
// SECTIONS construct
|
|
genOpWithBody<mlir::omp::SectionsOp>(
|
|
OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
|
|
.setGenNested(false),
|
|
/*reduction_vars=*/mlir::ValueRange(),
|
|
/*reductions=*/nullptr, allocateOperands, allocatorOperands,
|
|
nowaitClauseOperand);
|
|
|
|
const auto §ionBlocks =
|
|
std::get<Fortran::parser::OmpSectionBlocks>(sectionsConstruct.t);
|
|
auto &firOpBuilder = converter.getFirOpBuilder();
|
|
auto ip = firOpBuilder.saveInsertionPoint();
|
|
for (const auto &[nblock, neval] :
|
|
llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) {
|
|
symTable.pushScope();
|
|
genSectionOp(converter, semaCtx, neval, /*genNested=*/true, currentLocation,
|
|
sectionsClauseList);
|
|
symTable.popScope();
|
|
firOpBuilder.restoreInsertionPoint(ip);
|
|
}
|
|
}
|
|
|
|
static void
|
|
genOMP(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::lower::SymMap &symTable,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPAtomicConstruct &atomicConstruct) {
|
|
std::visit(
|
|
Fortran::common::visitors{
|
|
[&](const Fortran::parser::OmpAtomicRead &atomicRead) {
|
|
mlir::Location loc = converter.genLocation(atomicRead.source);
|
|
Fortran::lower::genOmpAccAtomicRead<
|
|
Fortran::parser::OmpAtomicRead,
|
|
Fortran::parser::OmpAtomicClauseList>(converter, atomicRead,
|
|
loc);
|
|
},
|
|
[&](const Fortran::parser::OmpAtomicWrite &atomicWrite) {
|
|
mlir::Location loc = converter.genLocation(atomicWrite.source);
|
|
Fortran::lower::genOmpAccAtomicWrite<
|
|
Fortran::parser::OmpAtomicWrite,
|
|
Fortran::parser::OmpAtomicClauseList>(converter, atomicWrite,
|
|
loc);
|
|
},
|
|
[&](const Fortran::parser::OmpAtomic &atomicConstruct) {
|
|
mlir::Location loc = converter.genLocation(atomicConstruct.source);
|
|
Fortran::lower::genOmpAtomic<Fortran::parser::OmpAtomic,
|
|
Fortran::parser::OmpAtomicClauseList>(
|
|
converter, atomicConstruct, loc);
|
|
},
|
|
[&](const Fortran::parser::OmpAtomicUpdate &atomicUpdate) {
|
|
mlir::Location loc = converter.genLocation(atomicUpdate.source);
|
|
Fortran::lower::genOmpAccAtomicUpdate<
|
|
Fortran::parser::OmpAtomicUpdate,
|
|
Fortran::parser::OmpAtomicClauseList>(converter, atomicUpdate,
|
|
loc);
|
|
},
|
|
[&](const Fortran::parser::OmpAtomicCapture &atomicCapture) {
|
|
mlir::Location loc = converter.genLocation(atomicCapture.source);
|
|
Fortran::lower::genOmpAccAtomicCapture<
|
|
Fortran::parser::OmpAtomicCapture,
|
|
Fortran::parser::OmpAtomicClauseList>(converter, atomicCapture,
|
|
loc);
|
|
},
|
|
},
|
|
atomicConstruct.u);
|
|
}
|
|
|
|
static void
|
|
markDeclareTarget(mlir::Operation *op,
|
|
Fortran::lower::AbstractConverter &converter,
|
|
mlir::omp::DeclareTargetCaptureClause captureClause,
|
|
mlir::omp::DeclareTargetDeviceType deviceType) {
|
|
// TODO: Add support for program local variables with declare target applied
|
|
auto declareTargetOp = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(op);
|
|
if (!declareTargetOp)
|
|
fir::emitFatalError(
|
|
converter.getCurrentLocation(),
|
|
"Attempt to apply declare target on unsupported operation");
|
|
|
|
// The function or global already has a declare target applied to it, very
|
|
// likely through implicit capture (usage in another declare target
|
|
// function/subroutine). It should be marked as any if it has been assigned
|
|
// both host and nohost, else we skip, as there is no change
|
|
if (declareTargetOp.isDeclareTarget()) {
|
|
if (declareTargetOp.getDeclareTargetDeviceType() != deviceType)
|
|
declareTargetOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::any,
|
|
captureClause);
|
|
return;
|
|
}
|
|
|
|
declareTargetOp.setDeclareTarget(deviceType, captureClause);
|
|
}
|
|
|
|
static void genOMP(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::lower::SymMap &symTable,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPDeclareTargetConstruct
|
|
&declareTargetConstruct) {
|
|
llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
|
|
mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
|
|
mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo(
|
|
converter, semaCtx, eval, declareTargetConstruct, symbolAndClause);
|
|
|
|
for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
|
|
mlir::Operation *op = mod.lookupSymbol(converter.mangleName(
|
|
std::get<const Fortran::semantics::Symbol &>(symClause)));
|
|
|
|
// Some symbols are deferred until later in the module, these are handled
|
|
// upon finalization of the module for OpenMP inside of Bridge, so we simply
|
|
// skip for now.
|
|
if (!op)
|
|
continue;
|
|
|
|
markDeclareTarget(
|
|
op, converter,
|
|
std::get<mlir::omp::DeclareTargetCaptureClause>(symClause), deviceType);
|
|
}
|
|
}
|
|
|
|
static void genOMP(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::lower::SymMap &symTable,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPConstruct &ompConstruct) {
|
|
std::visit(
|
|
Fortran::common::visitors{
|
|
[&](const Fortran::parser::OpenMPStandaloneConstruct
|
|
&standaloneConstruct) {
|
|
genOMP(converter, symTable, semaCtx, eval, standaloneConstruct);
|
|
},
|
|
[&](const Fortran::parser::OpenMPSectionsConstruct
|
|
§ionsConstruct) {
|
|
genOMP(converter, symTable, semaCtx, eval, sectionsConstruct);
|
|
},
|
|
[&](const Fortran::parser::OpenMPSectionConstruct §ionConstruct) {
|
|
// SECTION constructs are handled as a part of SECTIONS.
|
|
llvm_unreachable("Unexpected standalone OMP SECTION");
|
|
},
|
|
[&](const Fortran::parser::OpenMPLoopConstruct &loopConstruct) {
|
|
genOMP(converter, symTable, semaCtx, eval, loopConstruct);
|
|
},
|
|
[&](const Fortran::parser::OpenMPDeclarativeAllocate
|
|
&execAllocConstruct) {
|
|
TODO(converter.getCurrentLocation(), "OpenMPDeclarativeAllocate");
|
|
},
|
|
[&](const Fortran::parser::OpenMPExecutableAllocate
|
|
&execAllocConstruct) {
|
|
TODO(converter.getCurrentLocation(), "OpenMPExecutableAllocate");
|
|
},
|
|
[&](const Fortran::parser::OpenMPAllocatorsConstruct
|
|
&allocsConstruct) {
|
|
TODO(converter.getCurrentLocation(), "OpenMPAllocatorsConstruct");
|
|
},
|
|
[&](const Fortran::parser::OpenMPBlockConstruct &blockConstruct) {
|
|
genOMP(converter, symTable, semaCtx, eval, blockConstruct);
|
|
},
|
|
[&](const Fortran::parser::OpenMPAtomicConstruct &atomicConstruct) {
|
|
genOMP(converter, symTable, semaCtx, eval, atomicConstruct);
|
|
},
|
|
[&](const Fortran::parser::OpenMPCriticalConstruct
|
|
&criticalConstruct) {
|
|
genOMP(converter, symTable, semaCtx, eval, criticalConstruct);
|
|
},
|
|
},
|
|
ompConstruct.u);
|
|
}
|
|
|
|
static void
|
|
genOMP(Fortran::lower::AbstractConverter &converter,
|
|
Fortran::lower::SymMap &symTable,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPDeclarativeConstruct &ompDeclConstruct) {
|
|
std::visit(
|
|
Fortran::common::visitors{
|
|
[&](const Fortran::parser::OpenMPDeclarativeAllocate
|
|
&declarativeAllocate) {
|
|
TODO(converter.getCurrentLocation(), "OpenMPDeclarativeAllocate");
|
|
},
|
|
[&](const Fortran::parser::OpenMPDeclareReductionConstruct
|
|
&declareReductionConstruct) {
|
|
TODO(converter.getCurrentLocation(),
|
|
"OpenMPDeclareReductionConstruct");
|
|
},
|
|
[&](const Fortran::parser::OpenMPDeclareSimdConstruct
|
|
&declareSimdConstruct) {
|
|
TODO(converter.getCurrentLocation(), "OpenMPDeclareSimdConstruct");
|
|
},
|
|
[&](const Fortran::parser::OpenMPDeclareTargetConstruct
|
|
&declareTargetConstruct) {
|
|
genOMP(converter, symTable, semaCtx, eval, declareTargetConstruct);
|
|
},
|
|
[&](const Fortran::parser::OpenMPRequiresConstruct
|
|
&requiresConstruct) {
|
|
// Requires directives are gathered and processed in semantics and
|
|
// then combined in the lowering bridge before triggering codegen
|
|
// just once. Hence, there is no need to lower each individual
|
|
// occurrence here.
|
|
},
|
|
[&](const Fortran::parser::OpenMPThreadprivate &threadprivate) {
|
|
// The directive is lowered when instantiating the variable to
|
|
// support the case of threadprivate variable declared in module.
|
|
},
|
|
},
|
|
ompDeclConstruct.u);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Public functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder,
|
|
mlir::Operation *op,
|
|
mlir::Location loc) {
|
|
if (mlir::isa<mlir::omp::WsLoopOp, mlir::omp::ReductionDeclareOp,
|
|
mlir::omp::AtomicUpdateOp, mlir::omp::SimdLoopOp>(op))
|
|
return builder.create<mlir::omp::YieldOp>(loc);
|
|
else
|
|
return builder.create<mlir::omp::TerminatorOp>(loc);
|
|
}
|
|
|
|
void Fortran::lower::genOpenMPConstruct(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
Fortran::lower::SymMap &symTable,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPConstruct &omp) {
|
|
symTable.pushScope();
|
|
genOMP(converter, symTable, semaCtx, eval, omp);
|
|
symTable.popScope();
|
|
}
|
|
|
|
void Fortran::lower::genOpenMPDeclarativeConstruct(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
Fortran::lower::SymMap &symTable,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPDeclarativeConstruct &omp) {
|
|
genOMP(converter, symTable, semaCtx, eval, omp);
|
|
genNestedEvaluations(converter, eval);
|
|
}
|
|
|
|
void Fortran::lower::genOpenMPSymbolProperties(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
const Fortran::lower::pft::Variable &var) {
|
|
assert(var.hasSymbol() && "Expecting Symbol");
|
|
const Fortran::semantics::Symbol &sym = var.getSymbol();
|
|
|
|
if (sym.test(Fortran::semantics::Symbol::Flag::OmpThreadprivate))
|
|
Fortran::lower::genThreadprivateOp(converter, var);
|
|
|
|
if (sym.test(Fortran::semantics::Symbol::Flag::OmpDeclareTarget))
|
|
Fortran::lower::genDeclareTargetIntGlobal(converter, var);
|
|
}
|
|
|
|
int64_t Fortran::lower::getCollapseValue(
|
|
const Fortran::parser::OmpClauseList &clauseList) {
|
|
for (const Fortran::parser::OmpClause &clause : clauseList.v) {
|
|
if (const auto &collapseClause =
|
|
std::get_if<Fortran::parser::OmpClause::Collapse>(&clause.u)) {
|
|
const auto *expr = Fortran::semantics::GetExpr(collapseClause->v);
|
|
return Fortran::evaluate::ToInt64(*expr).value();
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
void Fortran::lower::genThreadprivateOp(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
const Fortran::lower::pft::Variable &var) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
mlir::Location currentLocation = converter.getCurrentLocation();
|
|
|
|
const Fortran::semantics::Symbol &sym = var.getSymbol();
|
|
mlir::Value symThreadprivateValue;
|
|
if (const Fortran::semantics::Symbol *common =
|
|
Fortran::semantics::FindCommonBlockContaining(sym.GetUltimate())) {
|
|
mlir::Value commonValue = converter.getSymbolAddress(*common);
|
|
if (mlir::isa<mlir::omp::ThreadprivateOp>(commonValue.getDefiningOp())) {
|
|
// Generate ThreadprivateOp for a common block instead of its members and
|
|
// only do it once for a common block.
|
|
return;
|
|
}
|
|
// Generate ThreadprivateOp and rebind the common block.
|
|
mlir::Value commonThreadprivateValue =
|
|
firOpBuilder.create<mlir::omp::ThreadprivateOp>(
|
|
currentLocation, commonValue.getType(), commonValue);
|
|
converter.bindSymbol(*common, commonThreadprivateValue);
|
|
// Generate the threadprivate value for the common block member.
|
|
symThreadprivateValue = genCommonBlockMember(converter, currentLocation,
|
|
sym, commonThreadprivateValue);
|
|
} else if (!var.isGlobal()) {
|
|
// Non-global variable which can be in threadprivate directive must be one
|
|
// variable in main program, and it has implicit SAVE attribute. Take it as
|
|
// with SAVE attribute, so to create GlobalOp for it to simplify the
|
|
// translation to LLVM IR.
|
|
// Avoids performing multiple globalInitializations.
|
|
fir::GlobalOp global;
|
|
auto module = converter.getModuleOp();
|
|
std::string globalName = converter.mangleName(sym);
|
|
if (module.lookupSymbol<fir::GlobalOp>(globalName))
|
|
global = module.lookupSymbol<fir::GlobalOp>(globalName);
|
|
else
|
|
global = globalInitialization(converter, firOpBuilder, sym, var,
|
|
currentLocation);
|
|
|
|
mlir::Value symValue = firOpBuilder.create<fir::AddrOfOp>(
|
|
currentLocation, global.resultType(), global.getSymbol());
|
|
symThreadprivateValue = firOpBuilder.create<mlir::omp::ThreadprivateOp>(
|
|
currentLocation, symValue.getType(), symValue);
|
|
} else {
|
|
mlir::Value symValue = converter.getSymbolAddress(sym);
|
|
|
|
// The symbol may be use-associated multiple times, and nothing needs to be
|
|
// done after the original symbol is mapped to the threadprivatized value
|
|
// for the first time. Use the threadprivatized value directly.
|
|
mlir::Operation *op;
|
|
if (auto declOp = symValue.getDefiningOp<hlfir::DeclareOp>())
|
|
op = declOp.getMemref().getDefiningOp();
|
|
else
|
|
op = symValue.getDefiningOp();
|
|
if (mlir::isa<mlir::omp::ThreadprivateOp>(op))
|
|
return;
|
|
|
|
symThreadprivateValue = firOpBuilder.create<mlir::omp::ThreadprivateOp>(
|
|
currentLocation, symValue.getType(), symValue);
|
|
}
|
|
|
|
fir::ExtendedValue sexv = converter.getSymbolExtendedValue(sym);
|
|
fir::ExtendedValue symThreadprivateExv =
|
|
getExtendedValue(sexv, symThreadprivateValue);
|
|
converter.bindSymbol(sym, symThreadprivateExv);
|
|
}
|
|
|
|
// This function replicates threadprivate's behaviour of generating
|
|
// an internal fir.GlobalOp for non-global variables in the main program
|
|
// that have the implicit SAVE attribute, to simplifiy LLVM-IR and MLIR
|
|
// generation.
|
|
void Fortran::lower::genDeclareTargetIntGlobal(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
const Fortran::lower::pft::Variable &var) {
|
|
if (!var.isGlobal()) {
|
|
// A non-global variable which can be in a declare target directive must
|
|
// be a variable in the main program, and it has the implicit SAVE
|
|
// attribute. We create a GlobalOp for it to simplify the translation to
|
|
// LLVM IR.
|
|
globalInitialization(converter, converter.getFirOpBuilder(),
|
|
var.getSymbol(), var, converter.getCurrentLocation());
|
|
}
|
|
}
|
|
|
|
// Generate an OpenMP reduction operation.
|
|
// TODO: Currently assumes it is either an integer addition/multiplication
|
|
// reduction, or a logical and reduction. Generalize this for various reduction
|
|
// operation types.
|
|
// TODO: Generate the reduction operation during lowering instead of creating
|
|
// and removing operations since this is not a robust approach. Also, removing
|
|
// ops in the builder (instead of a rewriter) is probably not the best approach.
|
|
void Fortran::lower::genOpenMPReduction(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
const Fortran::parser::OmpClauseList &clauseList) {
|
|
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
|
|
|
|
for (const Fortran::parser::OmpClause &clause : clauseList.v) {
|
|
if (const auto &reductionClause =
|
|
std::get_if<Fortran::parser::OmpClause::Reduction>(&clause.u)) {
|
|
const auto &redOperator{std::get<Fortran::parser::OmpReductionOperator>(
|
|
reductionClause->v.t)};
|
|
const auto &objectList{
|
|
std::get<Fortran::parser::OmpObjectList>(reductionClause->v.t)};
|
|
if (const auto *reductionOp =
|
|
std::get_if<Fortran::parser::DefinedOperator>(&redOperator.u)) {
|
|
const auto &intrinsicOp{
|
|
std::get<Fortran::parser::DefinedOperator::IntrinsicOperator>(
|
|
reductionOp->u)};
|
|
|
|
switch (intrinsicOp) {
|
|
case Fortran::parser::DefinedOperator::IntrinsicOperator::Add:
|
|
case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply:
|
|
case Fortran::parser::DefinedOperator::IntrinsicOperator::AND:
|
|
case Fortran::parser::DefinedOperator::IntrinsicOperator::EQV:
|
|
case Fortran::parser::DefinedOperator::IntrinsicOperator::OR:
|
|
case Fortran::parser::DefinedOperator::IntrinsicOperator::NEQV:
|
|
break;
|
|
default:
|
|
continue;
|
|
}
|
|
for (const Fortran::parser::OmpObject &ompObject : objectList.v) {
|
|
if (const auto *name{
|
|
Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
|
|
if (const Fortran::semantics::Symbol * symbol{name->symbol}) {
|
|
mlir::Value reductionVal = converter.getSymbolAddress(*symbol);
|
|
if (auto declOp = reductionVal.getDefiningOp<hlfir::DeclareOp>())
|
|
reductionVal = declOp.getBase();
|
|
mlir::Type reductionType =
|
|
reductionVal.getType().cast<fir::ReferenceType>().getEleTy();
|
|
if (!reductionType.isa<fir::LogicalType>()) {
|
|
if (!reductionType.isIntOrIndexOrFloat())
|
|
continue;
|
|
}
|
|
for (mlir::OpOperand &reductionValUse : reductionVal.getUses()) {
|
|
if (auto loadOp = mlir::dyn_cast<fir::LoadOp>(
|
|
reductionValUse.getOwner())) {
|
|
mlir::Value loadVal = loadOp.getRes();
|
|
if (reductionType.isa<fir::LogicalType>()) {
|
|
mlir::Operation *reductionOp = findReductionChain(loadVal);
|
|
fir::ConvertOp convertOp =
|
|
getConvertFromReductionOp(reductionOp, loadVal);
|
|
updateReduction(reductionOp, firOpBuilder, loadVal,
|
|
reductionVal, &convertOp);
|
|
removeStoreOp(reductionOp, reductionVal);
|
|
} else if (mlir::Operation *reductionOp =
|
|
findReductionChain(loadVal, &reductionVal)) {
|
|
updateReduction(reductionOp, firOpBuilder, loadVal,
|
|
reductionVal);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else if (const auto *reductionIntrinsic =
|
|
std::get_if<Fortran::parser::ProcedureDesignator>(
|
|
&redOperator.u)) {
|
|
if (!ReductionProcessor::supportedIntrinsicProcReduction(
|
|
*reductionIntrinsic))
|
|
continue;
|
|
ReductionProcessor::ReductionIdentifier redId =
|
|
ReductionProcessor::getReductionType(*reductionIntrinsic);
|
|
for (const Fortran::parser::OmpObject &ompObject : objectList.v) {
|
|
if (const auto *name{
|
|
Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
|
|
if (const Fortran::semantics::Symbol * symbol{name->symbol}) {
|
|
mlir::Value reductionVal = converter.getSymbolAddress(*symbol);
|
|
if (auto declOp = reductionVal.getDefiningOp<hlfir::DeclareOp>())
|
|
reductionVal = declOp.getBase();
|
|
for (const mlir::OpOperand &reductionValUse :
|
|
reductionVal.getUses()) {
|
|
if (auto loadOp = mlir::dyn_cast<fir::LoadOp>(
|
|
reductionValUse.getOwner())) {
|
|
mlir::Value loadVal = loadOp.getRes();
|
|
// Max is lowered as a compare -> select.
|
|
// Match the pattern here.
|
|
mlir::Operation *reductionOp =
|
|
findReductionChain(loadVal, &reductionVal);
|
|
if (reductionOp == nullptr)
|
|
continue;
|
|
|
|
if (redId == ReductionProcessor::ReductionIdentifier::MAX ||
|
|
redId == ReductionProcessor::ReductionIdentifier::MIN) {
|
|
assert(mlir::isa<mlir::arith::SelectOp>(reductionOp) &&
|
|
"Selection Op not found in reduction intrinsic");
|
|
mlir::Operation *compareOp =
|
|
getCompareFromReductionOp(reductionOp, loadVal);
|
|
updateReduction(compareOp, firOpBuilder, loadVal,
|
|
reductionVal);
|
|
}
|
|
if (redId == ReductionProcessor::ReductionIdentifier::IOR ||
|
|
redId == ReductionProcessor::ReductionIdentifier::IEOR ||
|
|
redId == ReductionProcessor::ReductionIdentifier::IAND) {
|
|
updateReduction(reductionOp, firOpBuilder, loadVal,
|
|
reductionVal);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
mlir::Operation *Fortran::lower::findReductionChain(mlir::Value loadVal,
|
|
mlir::Value *reductionVal) {
|
|
for (mlir::OpOperand &loadOperand : loadVal.getUses()) {
|
|
if (mlir::Operation *reductionOp = loadOperand.getOwner()) {
|
|
if (auto convertOp = mlir::dyn_cast<fir::ConvertOp>(reductionOp)) {
|
|
for (mlir::OpOperand &convertOperand : convertOp.getRes().getUses()) {
|
|
if (mlir::Operation *reductionOp = convertOperand.getOwner())
|
|
return reductionOp;
|
|
}
|
|
}
|
|
for (mlir::OpOperand &reductionOperand : reductionOp->getUses()) {
|
|
if (auto store =
|
|
mlir::dyn_cast<fir::StoreOp>(reductionOperand.getOwner())) {
|
|
if (store.getMemref() == *reductionVal) {
|
|
store.erase();
|
|
return reductionOp;
|
|
}
|
|
}
|
|
if (auto assign =
|
|
mlir::dyn_cast<hlfir::AssignOp>(reductionOperand.getOwner())) {
|
|
if (assign.getLhs() == *reductionVal) {
|
|
assign.erase();
|
|
return reductionOp;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
// for a logical operator 'op' reduction X = X op Y
|
|
// This function returns the operation responsible for converting Y from
|
|
// fir.logical<4> to i1
|
|
fir::ConvertOp
|
|
Fortran::lower::getConvertFromReductionOp(mlir::Operation *reductionOp,
|
|
mlir::Value loadVal) {
|
|
for (mlir::Value reductionOperand : reductionOp->getOperands()) {
|
|
if (auto convertOp =
|
|
mlir::dyn_cast<fir::ConvertOp>(reductionOperand.getDefiningOp())) {
|
|
if (convertOp.getOperand() == loadVal)
|
|
continue;
|
|
return convertOp;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
void Fortran::lower::updateReduction(mlir::Operation *op,
|
|
fir::FirOpBuilder &firOpBuilder,
|
|
mlir::Value loadVal,
|
|
mlir::Value reductionVal,
|
|
fir::ConvertOp *convertOp) {
|
|
mlir::OpBuilder::InsertPoint insertPtDel = firOpBuilder.saveInsertionPoint();
|
|
firOpBuilder.setInsertionPoint(op);
|
|
|
|
mlir::Value reductionOp;
|
|
if (convertOp)
|
|
reductionOp = convertOp->getOperand();
|
|
else if (op->getOperand(0) == loadVal)
|
|
reductionOp = op->getOperand(1);
|
|
else
|
|
reductionOp = op->getOperand(0);
|
|
|
|
firOpBuilder.create<mlir::omp::ReductionOp>(op->getLoc(), reductionOp,
|
|
reductionVal);
|
|
firOpBuilder.restoreInsertionPoint(insertPtDel);
|
|
}
|
|
|
|
void Fortran::lower::removeStoreOp(mlir::Operation *reductionOp,
|
|
mlir::Value symVal) {
|
|
for (mlir::Operation *reductionOpUse : reductionOp->getUsers()) {
|
|
if (auto convertReduction =
|
|
mlir::dyn_cast<fir::ConvertOp>(reductionOpUse)) {
|
|
for (mlir::Operation *convertReductionUse :
|
|
convertReduction.getRes().getUsers()) {
|
|
if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(convertReductionUse)) {
|
|
if (storeOp.getMemref() == symVal)
|
|
storeOp.erase();
|
|
}
|
|
if (auto assignOp =
|
|
mlir::dyn_cast<hlfir::AssignOp>(convertReductionUse)) {
|
|
if (assignOp.getLhs() == symVal)
|
|
assignOp.erase();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bool Fortran::lower::isOpenMPTargetConstruct(
|
|
const Fortran::parser::OpenMPConstruct &omp) {
|
|
llvm::omp::Directive dir = llvm::omp::Directive::OMPD_unknown;
|
|
if (const auto *block =
|
|
std::get_if<Fortran::parser::OpenMPBlockConstruct>(&omp.u)) {
|
|
const auto &begin =
|
|
std::get<Fortran::parser::OmpBeginBlockDirective>(block->t);
|
|
dir = std::get<Fortran::parser::OmpBlockDirective>(begin.t).v;
|
|
} else if (const auto *loop =
|
|
std::get_if<Fortran::parser::OpenMPLoopConstruct>(&omp.u)) {
|
|
const auto &begin =
|
|
std::get<Fortran::parser::OmpBeginLoopDirective>(loop->t);
|
|
dir = std::get<Fortran::parser::OmpLoopDirective>(begin.t).v;
|
|
}
|
|
return llvm::omp::allTargetSet.test(dir);
|
|
}
|
|
|
|
void Fortran::lower::gatherOpenMPDeferredDeclareTargets(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPDeclarativeConstruct &ompDecl,
|
|
llvm::SmallVectorImpl<OMPDeferredDeclareTargetInfo>
|
|
&deferredDeclareTarget) {
|
|
std::visit(
|
|
Fortran::common::visitors{
|
|
[&](const Fortran::parser::OpenMPDeclareTargetConstruct &ompReq) {
|
|
collectDeferredDeclareTargets(converter, semaCtx, eval, ompReq,
|
|
deferredDeclareTarget);
|
|
},
|
|
[&](const auto &) {},
|
|
},
|
|
ompDecl.u);
|
|
}
|
|
|
|
bool Fortran::lower::isOpenMPDeviceDeclareTarget(
|
|
Fortran::lower::AbstractConverter &converter,
|
|
Fortran::semantics::SemanticsContext &semaCtx,
|
|
Fortran::lower::pft::Evaluation &eval,
|
|
const Fortran::parser::OpenMPDeclarativeConstruct &ompDecl) {
|
|
return std::visit(
|
|
Fortran::common::visitors{
|
|
[&](const Fortran::parser::OpenMPDeclareTargetConstruct &ompReq) {
|
|
mlir::omp::DeclareTargetDeviceType targetType =
|
|
getDeclareTargetFunctionDevice(converter, semaCtx, eval, ompReq)
|
|
.value_or(mlir::omp::DeclareTargetDeviceType::host);
|
|
return targetType != mlir::omp::DeclareTargetDeviceType::host;
|
|
},
|
|
[&](const auto &) { return false; },
|
|
},
|
|
ompDecl.u);
|
|
}
|
|
|
|
// In certain cases such as subroutine or function interfaces which declare
|
|
// but do not define or directly call the subroutine or function in the same
|
|
// module, their lowering is delayed until after the declare target construct
|
|
// itself is processed, so there symbol is not within the table.
|
|
//
|
|
// This function will also return true if we encounter any device declare
|
|
// target cases, to satisfy checking if we require the requires attributes
|
|
// on the module.
|
|
bool Fortran::lower::markOpenMPDeferredDeclareTargetFunctions(
|
|
mlir::Operation *mod,
|
|
llvm::SmallVectorImpl<OMPDeferredDeclareTargetInfo> &deferredDeclareTargets,
|
|
AbstractConverter &converter) {
|
|
bool deviceCodeFound = false;
|
|
auto modOp = llvm::cast<mlir::ModuleOp>(mod);
|
|
for (auto declTar : deferredDeclareTargets) {
|
|
mlir::Operation *op = modOp.lookupSymbol(converter.mangleName(declTar.sym));
|
|
|
|
// Due to interfaces being optionally emitted on usage in a module,
|
|
// not finding an operation at this point cannot be a hard error, we
|
|
// simply ignore it for now.
|
|
// TODO: Add semantic checks for detecting cases where an erronous
|
|
// (undefined) symbol has been supplied to a declare target clause
|
|
if (!op)
|
|
continue;
|
|
|
|
auto devType = declTar.declareTargetDeviceType;
|
|
if (!deviceCodeFound && devType != mlir::omp::DeclareTargetDeviceType::host)
|
|
deviceCodeFound = true;
|
|
|
|
markDeclareTarget(op, converter, declTar.declareTargetCaptureClause,
|
|
devType);
|
|
}
|
|
|
|
return deviceCodeFound;
|
|
}
|
|
|
|
void Fortran::lower::genOpenMPRequires(
|
|
mlir::Operation *mod, const Fortran::semantics::Symbol *symbol) {
|
|
using MlirRequires = mlir::omp::ClauseRequires;
|
|
using SemaRequires = Fortran::semantics::WithOmpDeclarative::RequiresFlag;
|
|
|
|
if (auto offloadMod =
|
|
llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(mod)) {
|
|
Fortran::semantics::WithOmpDeclarative::RequiresFlags semaFlags;
|
|
if (symbol) {
|
|
Fortran::common::visit(
|
|
[&](const auto &details) {
|
|
if constexpr (std::is_base_of_v<
|
|
Fortran::semantics::WithOmpDeclarative,
|
|
std::decay_t<decltype(details)>>) {
|
|
if (details.has_ompRequires())
|
|
semaFlags = *details.ompRequires();
|
|
}
|
|
},
|
|
symbol->details());
|
|
}
|
|
|
|
MlirRequires mlirFlags = MlirRequires::none;
|
|
if (semaFlags.test(SemaRequires::ReverseOffload))
|
|
mlirFlags = mlirFlags | MlirRequires::reverse_offload;
|
|
if (semaFlags.test(SemaRequires::UnifiedAddress))
|
|
mlirFlags = mlirFlags | MlirRequires::unified_address;
|
|
if (semaFlags.test(SemaRequires::UnifiedSharedMemory))
|
|
mlirFlags = mlirFlags | MlirRequires::unified_shared_memory;
|
|
if (semaFlags.test(SemaRequires::DynamicAllocators))
|
|
mlirFlags = mlirFlags | MlirRequires::dynamic_allocators;
|
|
|
|
offloadMod.setRequires(mlirFlags);
|
|
}
|
|
}
|