//===-- OpenMP.cpp -- Open MP directive lowering --------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ // //===----------------------------------------------------------------------===// #include "flang/Lower/OpenMP.h" #include "ClauseProcessor.h" #include "Clauses.h" #include "DataSharingProcessor.h" #include "DirectivesCommon.h" #include "ReductionProcessor.h" #include "flang/Common/idioms.h" #include "flang/Lower/Bridge.h" #include "flang/Lower/ConvertExpr.h" #include "flang/Lower/ConvertVariable.h" #include "flang/Lower/StatementContext.h" #include "flang/Lower/SymbolMap.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/openmp-directive-sets.h" #include "flang/Semantics/tools.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" using namespace Fortran::lower::omp; //===----------------------------------------------------------------------===// // Code generation helper functions //===----------------------------------------------------------------------===// static Fortran::lower::pft::Evaluation * getCollapsedLoopEval(Fortran::lower::pft::Evaluation &eval, int collapseValue) { // Return the Evaluation of the innermost collapsed loop, or the current one // if there was no COLLAPSE. if (collapseValue == 0) return &eval; Fortran::lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation(); for (int i = 1; i < collapseValue; i++) { // The nested evaluations should be DoConstructs (i.e. they should form // a loop nest). Each DoConstruct is a tuple . assert(curEval->isA()); curEval = &*std::next(curEval->getNestedEvaluations().begin()); } return curEval; } static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, int collapseValue = 0) { Fortran::lower::pft::Evaluation *curEval = getCollapsedLoopEval(eval, collapseValue); for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations()) converter.genEval(e); } static fir::GlobalOp globalInitialization( Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder, const Fortran::semantics::Symbol &sym, const Fortran::lower::pft::Variable &var, mlir::Location currentLocation) { mlir::Type ty = converter.genType(sym); std::string globalName = converter.mangleName(sym); mlir::StringAttr linkage = firOpBuilder.createInternalLinkage(); fir::GlobalOp global = firOpBuilder.createGlobal(currentLocation, ty, globalName, linkage); // Create default initialization for non-character scalar. if (Fortran::semantics::IsAllocatableOrObjectPointer(&sym)) { mlir::Type baseAddrType = ty.dyn_cast().getEleTy(); Fortran::lower::createGlobalInitialization( firOpBuilder, global, [&](fir::FirOpBuilder &b) { mlir::Value nullAddr = b.createNullConstant(currentLocation, baseAddrType); mlir::Value box = b.create(currentLocation, ty, nullAddr); b.create(currentLocation, box); }); } else { Fortran::lower::createGlobalInitialization( firOpBuilder, global, [&](fir::FirOpBuilder &b) { mlir::Value undef = b.create(currentLocation, ty); b.create(currentLocation, undef); }); } return global; } static mlir::Operation *getCompareFromReductionOp(mlir::Operation *reductionOp, mlir::Value loadVal) { for (mlir::Value reductionOperand : reductionOp->getOperands()) { if (mlir::Operation *compareOp = reductionOperand.getDefiningOp()) { if (compareOp->getOperand(0) == loadVal || compareOp->getOperand(1) == loadVal) assert((mlir::isa(compareOp) || mlir::isa(compareOp)) && "Expected comparison not found in reduction intrinsic"); return compareOp; } } return nullptr; } // Get the extended value for \p val by extracting additional variable // information from \p base. static fir::ExtendedValue getExtendedValue(fir::ExtendedValue base, mlir::Value val) { return base.match( [&](const fir::MutableBoxValue &box) -> fir::ExtendedValue { return fir::MutableBoxValue(val, box.nonDeferredLenParams(), {}); }, [&](const auto &) -> fir::ExtendedValue { return fir::substBase(base, val); }); } #ifndef NDEBUG static bool isThreadPrivate(Fortran::lower::SymbolRef sym) { if (const auto *details = sym->detailsIf()) { for (const auto &obj : details->objects()) if (!obj->test(Fortran::semantics::Symbol::Flag::OmpThreadprivate)) return false; return true; } return sym->test(Fortran::semantics::Symbol::Flag::OmpThreadprivate); } #endif static void threadPrivatizeVars(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::Location currentLocation = converter.getCurrentLocation(); mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint(); firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); // If the symbol corresponds to the original ThreadprivateOp, use the symbol // value from that operation to create one ThreadprivateOp copy operation // inside the parallel region. // In some cases, however, the symbol will correspond to the original, // non-threadprivate variable. This can happen, for instance, with a common // block, declared in a separate module, used by a parent procedure and // privatized in its child procedure. auto genThreadprivateOp = [&](Fortran::lower::SymbolRef sym) -> mlir::Value { assert(isThreadPrivate(sym)); mlir::Value symValue = converter.getSymbolAddress(sym); mlir::Operation *op = symValue.getDefiningOp(); if (auto declOp = mlir::dyn_cast(op)) op = declOp.getMemref().getDefiningOp(); if (mlir::isa(op)) symValue = mlir::dyn_cast(op).getSymAddr(); return firOpBuilder.create( currentLocation, symValue.getType(), symValue); }; llvm::SetVector threadprivateSyms; converter.collectSymbolSet(eval, threadprivateSyms, Fortran::semantics::Symbol::Flag::OmpThreadprivate, /*collectSymbols=*/true, /*collectHostAssociatedSymbols=*/true); std::set threadprivateSymNames; // For a COMMON block, the ThreadprivateOp is generated for itself instead of // its members, so only bind the value of the new copied ThreadprivateOp // inside the parallel region to the common block symbol only once for // multiple members in one COMMON block. llvm::SetVector commonSyms; for (std::size_t i = 0; i < threadprivateSyms.size(); i++) { const Fortran::semantics::Symbol *sym = threadprivateSyms[i]; mlir::Value symThreadprivateValue; // The variable may be used more than once, and each reference has one // symbol with the same name. Only do once for references of one variable. if (threadprivateSymNames.find(sym->name()) != threadprivateSymNames.end()) continue; threadprivateSymNames.insert(sym->name()); if (const Fortran::semantics::Symbol *common = Fortran::semantics::FindCommonBlockContaining(sym->GetUltimate())) { mlir::Value commonThreadprivateValue; if (commonSyms.contains(common)) { commonThreadprivateValue = converter.getSymbolAddress(*common); } else { commonThreadprivateValue = genThreadprivateOp(*common); converter.bindSymbol(*common, commonThreadprivateValue); commonSyms.insert(common); } symThreadprivateValue = Fortran::lower::genCommonBlockMember( converter, currentLocation, *sym, commonThreadprivateValue); } else { symThreadprivateValue = genThreadprivateOp(*sym); } fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*sym); fir::ExtendedValue symThreadprivateExv = getExtendedValue(sexv, symThreadprivateValue); converter.bindSymbol(*sym, symThreadprivateExv); } firOpBuilder.restoreInsertionPoint(insPt); } static mlir::Operation * createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter, mlir::Location loc, mlir::Value indexVal, const Fortran::semantics::Symbol *sym) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint(); firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); mlir::Type tempTy = converter.genType(*sym); mlir::Value temp = firOpBuilder.create( loc, tempTy, /*pinned=*/true, /*lengthParams=*/mlir::ValueRange{}, /*shapeParams*/ mlir::ValueRange{}, llvm::ArrayRef{ fir::getAdaptToByRefAttr(firOpBuilder)}); converter.bindSymbol(*sym, temp); firOpBuilder.restoreInsertionPoint(insPt); mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal); mlir::Operation *storeOp = firOpBuilder.create( loc, cvtVal, converter.getSymbolAddress(*sym)); return storeOp; } static mlir::Operation * findReductionChain(mlir::Value loadVal, mlir::Value *reductionVal = nullptr) { for (mlir::OpOperand &loadOperand : loadVal.getUses()) { if (mlir::Operation *reductionOp = loadOperand.getOwner()) { if (auto convertOp = mlir::dyn_cast(reductionOp)) { for (mlir::OpOperand &convertOperand : convertOp.getRes().getUses()) { if (mlir::Operation *reductionOp = convertOperand.getOwner()) return reductionOp; } } for (mlir::OpOperand &reductionOperand : reductionOp->getUses()) { if (auto store = mlir::dyn_cast(reductionOperand.getOwner())) { if (store.getMemref() == *reductionVal) { store.erase(); return reductionOp; } } if (auto assign = mlir::dyn_cast(reductionOperand.getOwner())) { if (assign.getLhs() == *reductionVal) { assign.erase(); return reductionOp; } } } } } return nullptr; } // for a logical operator 'op' reduction X = X op Y // This function returns the operation responsible for converting Y from // fir.logical<4> to i1 static fir::ConvertOp getConvertFromReductionOp(mlir::Operation *reductionOp, mlir::Value loadVal) { for (mlir::Value reductionOperand : reductionOp->getOperands()) { if (auto convertOp = mlir::dyn_cast(reductionOperand.getDefiningOp())) { if (convertOp.getOperand() == loadVal) continue; return convertOp; } } return nullptr; } static void updateReduction(mlir::Operation *op, fir::FirOpBuilder &firOpBuilder, mlir::Value loadVal, mlir::Value reductionVal, fir::ConvertOp *convertOp = nullptr) { mlir::OpBuilder::InsertPoint insertPtDel = firOpBuilder.saveInsertionPoint(); firOpBuilder.setInsertionPoint(op); mlir::Value reductionOp; if (convertOp) reductionOp = convertOp->getOperand(); else if (op->getOperand(0) == loadVal) reductionOp = op->getOperand(1); else reductionOp = op->getOperand(0); firOpBuilder.create(op->getLoc(), reductionOp, reductionVal); firOpBuilder.restoreInsertionPoint(insertPtDel); } static void removeStoreOp(mlir::Operation *reductionOp, mlir::Value symVal) { for (mlir::Operation *reductionOpUse : reductionOp->getUsers()) { if (auto convertReduction = mlir::dyn_cast(reductionOpUse)) { for (mlir::Operation *convertReductionUse : convertReduction.getRes().getUsers()) { if (auto storeOp = mlir::dyn_cast(convertReductionUse)) { if (storeOp.getMemref() == symVal) storeOp.erase(); } if (auto assignOp = mlir::dyn_cast(convertReductionUse)) { if (assignOp.getLhs() == symVal) assignOp.erase(); } } } } } // Generate an OpenMP reduction operation. // TODO: Currently assumes it is either an integer addition/multiplication // reduction, or a logical and reduction. Generalize this for various reduction // operation types. // TODO: Generate the reduction operation during lowering instead of creating // and removing operations since this is not a robust approach. Also, removing // ops in the builder (instead of a rewriter) is probably not the best approach. static void genOpenMPReduction(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, const Fortran::parser::OmpClauseList &clauseList) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); List clauses{makeClauses(clauseList, semaCtx)}; for (const Clause &clause : clauses) { if (const auto &reductionClause = std::get_if(&clause.u)) { const auto &redOperatorList{ std::get( reductionClause->t)}; assert(redOperatorList.size() == 1 && "Expecting single operator"); const auto &redOperator = redOperatorList.front(); const auto &objects{std::get(reductionClause->t)}; if (const auto *reductionOp = std::get_if(&redOperator.u)) { const auto &intrinsicOp{ std::get( reductionOp->u)}; switch (intrinsicOp) { case clause::DefinedOperator::IntrinsicOperator::Add: case clause::DefinedOperator::IntrinsicOperator::Multiply: case clause::DefinedOperator::IntrinsicOperator::AND: case clause::DefinedOperator::IntrinsicOperator::EQV: case clause::DefinedOperator::IntrinsicOperator::OR: case clause::DefinedOperator::IntrinsicOperator::NEQV: break; default: continue; } for (const Object &object : objects) { if (const Fortran::semantics::Symbol *symbol = object.id()) { mlir::Value reductionVal = converter.getSymbolAddress(*symbol); if (auto declOp = reductionVal.getDefiningOp()) reductionVal = declOp.getBase(); mlir::Type reductionType = reductionVal.getType().cast().getEleTy(); if (!reductionType.isa()) { if (!reductionType.isIntOrIndexOrFloat()) continue; } for (mlir::OpOperand &reductionValUse : reductionVal.getUses()) { if (auto loadOp = mlir::dyn_cast(reductionValUse.getOwner())) { mlir::Value loadVal = loadOp.getRes(); if (reductionType.isa()) { mlir::Operation *reductionOp = findReductionChain(loadVal); fir::ConvertOp convertOp = getConvertFromReductionOp(reductionOp, loadVal); updateReduction(reductionOp, firOpBuilder, loadVal, reductionVal, &convertOp); removeStoreOp(reductionOp, reductionVal); } else if (mlir::Operation *reductionOp = findReductionChain(loadVal, &reductionVal)) { updateReduction(reductionOp, firOpBuilder, loadVal, reductionVal); } } } } } } else if (const auto *reductionIntrinsic = std::get_if(&redOperator.u)) { if (!ReductionProcessor::supportedIntrinsicProcReduction( *reductionIntrinsic)) continue; ReductionProcessor::ReductionIdentifier redId = ReductionProcessor::getReductionType(*reductionIntrinsic); for (const Object &object : objects) { if (const Fortran::semantics::Symbol *symbol = object.id()) { mlir::Value reductionVal = converter.getSymbolAddress(*symbol); if (auto declOp = reductionVal.getDefiningOp()) reductionVal = declOp.getBase(); for (const mlir::OpOperand &reductionValUse : reductionVal.getUses()) { if (auto loadOp = mlir::dyn_cast(reductionValUse.getOwner())) { mlir::Value loadVal = loadOp.getRes(); // Max is lowered as a compare -> select. // Match the pattern here. mlir::Operation *reductionOp = findReductionChain(loadVal, &reductionVal); if (reductionOp == nullptr) continue; if (redId == ReductionProcessor::ReductionIdentifier::MAX || redId == ReductionProcessor::ReductionIdentifier::MIN) { assert(mlir::isa(reductionOp) && "Selection Op not found in reduction intrinsic"); mlir::Operation *compareOp = getCompareFromReductionOp(reductionOp, loadVal); updateReduction(compareOp, firOpBuilder, loadVal, reductionVal); } if (redId == ReductionProcessor::ReductionIdentifier::IOR || redId == ReductionProcessor::ReductionIdentifier::IEOR || redId == ReductionProcessor::ReductionIdentifier::IAND) { updateReduction(reductionOp, firOpBuilder, loadVal, reductionVal); } } } } } } } } } struct OpWithBodyGenInfo { /// A type for a code-gen callback function. This takes as argument the op for /// which the code is being generated and returns the arguments of the op's /// region. using GenOMPRegionEntryCBFn = std::function( mlir::Operation *)>; OpWithBodyGenInfo(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, mlir::Location loc, Fortran::lower::pft::Evaluation &eval) : converter(converter), semaCtx(semaCtx), loc(loc), eval(eval) {} OpWithBodyGenInfo &setGenNested(bool value) { genNested = value; return *this; } OpWithBodyGenInfo &setOuterCombined(bool value) { outerCombined = value; return *this; } OpWithBodyGenInfo &setClauses(const Fortran::parser::OmpClauseList *value) { clauses = value; return *this; } OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) { dsp = value; return *this; } OpWithBodyGenInfo &setReductions( llvm::SmallVectorImpl *value1, llvm::SmallVectorImpl *value2) { reductionSymbols = value1; reductionTypes = value2; return *this; } OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) { genRegionEntryCB = value; return *this; } /// [inout] converter to use for the clauses. Fortran::lower::AbstractConverter &converter; /// [in] Semantics context Fortran::semantics::SemanticsContext &semaCtx; /// [in] location in source code. mlir::Location loc; /// [in] current PFT node/evaluation. Fortran::lower::pft::Evaluation &eval; /// [in] whether to generate FIR for nested evaluations bool genNested = true; /// [in] is this an outer operation - prevents privatization. bool outerCombined = false; /// [in] list of clauses to process. const Fortran::parser::OmpClauseList *clauses = nullptr; /// [in] if provided, processes the construct's data-sharing attributes. DataSharingProcessor *dsp = nullptr; /// [in] if provided, list of reduction symbols llvm::SmallVectorImpl *reductionSymbols = nullptr; /// [in] if provided, list of reduction types llvm::SmallVectorImpl *reductionTypes = nullptr; /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block /// is created in the region. GenOMPRegionEntryCBFn genRegionEntryCB = nullptr; }; /// Create the body (block) for an OpenMP Operation. /// /// \param [in] op - the operation the body belongs to. /// \param [in] info - options controlling code-gen for the construction. template static void createBodyOfOp(Op &op, OpWithBodyGenInfo &info) { fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); auto insertMarker = [](fir::FirOpBuilder &builder) { mlir::Value undef = builder.create(builder.getUnknownLoc(), builder.getIndexType()); return undef.getDefiningOp(); }; // If an argument for the region is provided then create the block with that // argument. Also update the symbol's address with the mlir argument value. // e.g. For loops the argument is the induction variable. And all further // uses of the induction variable should use this mlir value. auto regionArgs = [&]() -> llvm::SmallVector { if (info.genRegionEntryCB != nullptr) { return info.genRegionEntryCB(op); } firOpBuilder.createBlock(&op.getRegion()); return {}; }(); // Mark the earliest insertion point. mlir::Operation *marker = insertMarker(firOpBuilder); // If it is an unstructured region and is not the outer region of a combined // construct, create empty blocks for all evaluations. if (info.eval.lowerAsUnstructured() && !info.outerCombined) Fortran::lower::createEmptyRegionBlocks( firOpBuilder, info.eval.getNestedEvaluations()); // Start with privatization, so that the lowering of the nested // code will use the right symbols. constexpr bool isLoop = std::is_same_v || std::is_same_v; bool privatize = info.clauses && !info.outerCombined; firOpBuilder.setInsertionPoint(marker); std::optional tempDsp; if (privatize) { if (!info.dsp) { tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval); tempDsp->processStep1(); } } if constexpr (std::is_same_v) { threadPrivatizeVars(info.converter, info.eval); if (info.clauses) { firOpBuilder.setInsertionPoint(marker); ClauseProcessor(info.converter, info.semaCtx, *info.clauses) .processCopyin(); } } if (info.genNested) { // genFIR(Evaluation&) tries to patch up unterminated blocks, causing // a lot of complications for our approach if the terminator generation // is delayed past this point. Insert a temporary terminator here, then // delete it. firOpBuilder.setInsertionPointToEnd(&op.getRegion().back()); auto *temp = Fortran::lower::genOpenMPTerminator( firOpBuilder, op.getOperation(), info.loc); firOpBuilder.setInsertionPointAfter(marker); genNestedEvaluations(info.converter, info.eval); temp->erase(); } // Get or create a unique exiting block from the given region, or // return nullptr if there is no exiting block. auto getUniqueExit = [&](mlir::Region ®ion) -> mlir::Block * { // Find the blocks where the OMP terminator should go. In simple cases // it is the single block in the operation's region. When the region // is more complicated, especially with unstructured control flow, there // may be multiple blocks, and some of them may have non-OMP terminators // resulting from lowering of the code contained within the operation. // All the remaining blocks are potential exit points from the op's region. // // Explicit control flow cannot exit any OpenMP region (other than via // STOP), and that is enforced by semantic checks prior to lowering. STOP // statements are lowered to a function call. // Collect unterminated blocks. llvm::SmallVector exits; for (mlir::Block &b : region) { if (b.empty() || !b.back().hasTrait()) exits.push_back(&b); } if (exits.empty()) return nullptr; // If there already is a unique exiting block, do not create another one. // Additionally, some ops (e.g. omp.sections) require only 1 block in // its region. if (exits.size() == 1) return exits[0]; mlir::Block *exit = firOpBuilder.createBlock(®ion); for (mlir::Block *b : exits) { firOpBuilder.setInsertionPointToEnd(b); firOpBuilder.create(info.loc, exit); } return exit; }; if (auto *exitBlock = getUniqueExit(op.getRegion())) { firOpBuilder.setInsertionPointToEnd(exitBlock); auto *term = Fortran::lower::genOpenMPTerminator( firOpBuilder, op.getOperation(), info.loc); // Only insert lastprivate code when there actually is an exit block. // Such a block may not exist if the nested code produced an infinite // loop (this may not make sense in production code, but a user could // write that and we should handle it). firOpBuilder.setInsertionPoint(term); if (privatize) { if (!info.dsp) { assert(tempDsp.has_value()); tempDsp->processStep2(op, isLoop); } else { if (isLoop && regionArgs.size() > 0) info.dsp->setLoopIV(info.converter.getSymbolAddress(*regionArgs[0])); info.dsp->processStep2(op, isLoop); } } } firOpBuilder.setInsertionPointAfter(marker); marker->erase(); } static void genBodyOfTargetDataOp( Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::omp::TargetDataOp &dataOp, llvm::ArrayRef useDeviceTypes, llvm::ArrayRef useDeviceLocs, llvm::ArrayRef useDeviceSymbols, const mlir::Location ¤tLocation) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::Region ®ion = dataOp.getRegion(); firOpBuilder.createBlock(®ion, {}, useDeviceTypes, useDeviceLocs); for (auto [argIndex, argSymbol] : llvm::enumerate(useDeviceSymbols)) { const mlir::BlockArgument &arg = region.front().getArgument(argIndex); fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*argSymbol); if (auto refType = arg.getType().dyn_cast()) { if (fir::isa_builtin_cptr_type(refType.getElementType())) { converter.bindSymbol(*argSymbol, arg); } else { // Avoid capture of a reference to a structured binding. const Fortran::semantics::Symbol *sym = argSymbol; extVal.match( [&](const fir::MutableBoxValue &mbv) { converter.bindSymbol( *sym, fir::MutableBoxValue( arg, fir::factory::getNonDeferredLenParams(extVal), {})); }, [&](const auto &) { TODO(converter.getCurrentLocation(), "use_device clause operand unsupported type"); }); } } else { TODO(converter.getCurrentLocation(), "use_device clause operand unsupported type"); } } // Insert dummy instruction to remember the insertion position. The // marker will be deleted by clean up passes since there are no uses. // Remembering the position for further insertion is important since // there are hlfir.declares inserted above while setting block arguments // and new code from the body should be inserted after that. mlir::Value undefMarker = firOpBuilder.create( dataOp.getOperation()->getLoc(), firOpBuilder.getIndexType()); // Create blocks for unstructured regions. This has to be done since // blocks are initially allocated with the function as the parent region. if (eval.lowerAsUnstructured()) { Fortran::lower::createEmptyRegionBlocks( firOpBuilder, eval.getNestedEvaluations()); } firOpBuilder.create(currentLocation); // Set the insertion point after the marker. firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); if (genNested) genNestedEvaluations(converter, eval); } template static OpTy genOpWithBody(OpWithBodyGenInfo &info, Args &&...args) { auto op = info.converter.getFirOpBuilder().create( info.loc, std::forward(args)...); createBodyOfOp(op, info); return op; } static mlir::omp::MasterOp genMasterOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation) { return genOpWithBody( OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) .setGenNested(genNested), /*resultTypes=*/mlir::TypeRange()); } static mlir::omp::OrderedRegionOp genOrderedRegionOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation) { return genOpWithBody( OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) .setGenNested(genNested), /*simd=*/false); } static mlir::omp::ParallelOp genParallelOp(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &clauseList, bool outerCombined = false) { Fortran::lower::StatementContext stmtCtx; mlir::Value ifClauseOperand, numThreadsClauseOperand; mlir::omp::ClauseProcBindKindAttr procBindKindAttr; llvm::SmallVector allocateOperands, allocatorOperands, reductionVars; llvm::SmallVector reductionTypes; llvm::SmallVector reductionDeclSymbols; llvm::SmallVector reductionSymbols; ClauseProcessor cp(converter, semaCtx, clauseList); cp.processIf(llvm::omp::Directive::OMPD_parallel, ifClauseOperand); cp.processNumThreads(stmtCtx, numThreadsClauseOperand); cp.processProcBind(procBindKindAttr); cp.processDefault(); cp.processAllocate(allocatorOperands, allocateOperands); if (!outerCombined) cp.processReduction(currentLocation, reductionVars, reductionTypes, reductionDeclSymbols, &reductionSymbols); auto reductionCallback = [&](mlir::Operation *op) { llvm::SmallVector locs(reductionVars.size(), currentLocation); auto *block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {}, reductionTypes, locs); for (auto [arg, prv] : llvm::zip_equal(reductionSymbols, block->getArguments())) { converter.bindSymbol(*arg, prv); } return reductionSymbols; }; mlir::UnitAttr byrefAttr; if (ReductionProcessor::doReductionByRef(reductionVars)) byrefAttr = converter.getFirOpBuilder().getUnitAttr(); OpWithBodyGenInfo genInfo = OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) .setGenNested(genNested) .setOuterCombined(outerCombined) .setClauses(&clauseList) .setReductions(&reductionSymbols, &reductionTypes) .setGenRegionEntryCb(reductionCallback); if (!enableDelayedPrivatization) { return genOpWithBody( genInfo, /*resultTypes=*/mlir::TypeRange(), ifClauseOperand, numThreadsClauseOperand, allocateOperands, allocatorOperands, reductionVars, reductionDeclSymbols.empty() ? nullptr : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), reductionDeclSymbols), procBindKindAttr, /*private_vars=*/llvm::SmallVector{}, /*privatizers=*/nullptr, byrefAttr); } bool privatize = !outerCombined; DataSharingProcessor dsp(converter, semaCtx, clauseList, eval, /*useDelayedPrivatization=*/true, &symTable); if (privatize) dsp.processStep1(); const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo(); auto genRegionEntryCB = [&](mlir::Operation *op) { auto parallelOp = llvm::cast(op); llvm::SmallVector reductionLocs(reductionVars.size(), currentLocation); mlir::OperandRange privateVars = parallelOp.getPrivateVars(); mlir::Region ®ion = parallelOp.getRegion(); llvm::SmallVector privateVarTypes = reductionTypes; privateVarTypes.reserve(privateVarTypes.size() + privateVars.size()); llvm::transform(privateVars, std::back_inserter(privateVarTypes), [](mlir::Value v) { return v.getType(); }); llvm::SmallVector privateVarLocs = reductionLocs; privateVarLocs.reserve(privateVarLocs.size() + privateVars.size()); llvm::transform(privateVars, std::back_inserter(privateVarLocs), [](mlir::Value v) { return v.getLoc(); }); converter.getFirOpBuilder().createBlock(®ion, /*insertPt=*/{}, privateVarTypes, privateVarLocs); llvm::SmallVector allSymbols = reductionSymbols; allSymbols.append(delayedPrivatizationInfo.symbols); for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) { converter.bindSymbol(*arg, prv); } return allSymbols; }; // TODO Merge with the reduction CB. genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp); llvm::SmallVector privatizers( delayedPrivatizationInfo.privatizers.begin(), delayedPrivatizationInfo.privatizers.end()); return genOpWithBody( genInfo, /*resultTypes=*/mlir::TypeRange(), ifClauseOperand, numThreadsClauseOperand, allocateOperands, allocatorOperands, reductionVars, reductionDeclSymbols.empty() ? nullptr : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), reductionDeclSymbols), procBindKindAttr, delayedPrivatizationInfo.originalAddresses, delayedPrivatizationInfo.privatizers.empty() ? nullptr : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), privatizers), byrefAttr); } static mlir::omp::SectionOp genSectionOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation, const Fortran::parser::OmpClauseList §ionsClauseList) { // Currently only private/firstprivate clause is handled, and // all privatization is done within `omp.section` operations. return genOpWithBody( OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) .setGenNested(genNested) .setClauses(§ionsClauseList)); } static mlir::omp::SingleOp genSingleOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &beginClauseList, const Fortran::parser::OmpClauseList &endClauseList) { llvm::SmallVector allocateOperands, allocatorOperands; llvm::SmallVector copyPrivateVars; llvm::SmallVector copyPrivateFuncs; mlir::UnitAttr nowaitAttr; ClauseProcessor cp(converter, semaCtx, beginClauseList); cp.processAllocate(allocatorOperands, allocateOperands); ClauseProcessor ecp(converter, semaCtx, endClauseList); ecp.processNowait(nowaitAttr); ecp.processCopyPrivate(currentLocation, copyPrivateVars, copyPrivateFuncs); return genOpWithBody( OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) .setGenNested(genNested) .setClauses(&beginClauseList), allocateOperands, allocatorOperands, copyPrivateVars, copyPrivateFuncs.empty() ? nullptr : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), copyPrivateFuncs), nowaitAttr); } static mlir::omp::TaskOp genTaskOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &clauseList) { Fortran::lower::StatementContext stmtCtx; mlir::Value ifClauseOperand, finalClauseOperand, priorityClauseOperand; mlir::UnitAttr untiedAttr, mergeableAttr; llvm::SmallVector dependTypeOperands; llvm::SmallVector allocateOperands, allocatorOperands, dependOperands; ClauseProcessor cp(converter, semaCtx, clauseList); cp.processIf(llvm::omp::Directive::OMPD_task, ifClauseOperand); cp.processAllocate(allocatorOperands, allocateOperands); cp.processDefault(); cp.processFinal(stmtCtx, finalClauseOperand); cp.processUntied(untiedAttr); cp.processMergeable(mergeableAttr); cp.processPriority(stmtCtx, priorityClauseOperand); cp.processDepend(dependTypeOperands, dependOperands); cp.processTODO( currentLocation, llvm::omp::Directive::OMPD_task); return genOpWithBody( OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) .setGenNested(genNested) .setClauses(&clauseList), ifClauseOperand, finalClauseOperand, untiedAttr, mergeableAttr, /*in_reduction_vars=*/mlir::ValueRange(), /*in_reductions=*/nullptr, priorityClauseOperand, dependTypeOperands.empty() ? nullptr : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), dependTypeOperands), dependOperands, allocateOperands, allocatorOperands); } static mlir::omp::TaskgroupOp genTaskgroupOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &clauseList) { llvm::SmallVector allocateOperands, allocatorOperands; ClauseProcessor cp(converter, semaCtx, clauseList); cp.processAllocate(allocatorOperands, allocateOperands); cp.processTODO(currentLocation, llvm::omp::Directive::OMPD_taskgroup); return genOpWithBody( OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) .setGenNested(genNested) .setClauses(&clauseList), /*task_reduction_vars=*/mlir::ValueRange(), /*task_reductions=*/nullptr, allocateOperands, allocatorOperands); } // This helper function implements the functionality of "promoting" // non-CPTR arguments of use_device_ptr to use_device_addr // arguments (automagic conversion of use_device_ptr -> // use_device_addr in these cases). The way we do so currently is // through the shuffling of operands from the devicePtrOperands to // deviceAddrOperands where neccesary and re-organizing the types, // locations and symbols to maintain the correct ordering of ptr/addr // input -> BlockArg. // // This effectively implements some deprecated OpenMP functionality // that some legacy applications unfortunately depend on // (deprecated in specification version 5.2): // // "If a list item in a use_device_ptr clause is not of type C_PTR, // the behavior is as if the list item appeared in a use_device_addr // clause. Support for such list items in a use_device_ptr clause // is deprecated." static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr( llvm::SmallVectorImpl &devicePtrOperands, llvm::SmallVectorImpl &deviceAddrOperands, llvm::SmallVectorImpl &useDeviceTypes, llvm::SmallVectorImpl &useDeviceLocs, llvm::SmallVectorImpl &useDeviceSymbols) { auto moveElementToBack = [](size_t idx, auto &vector) { auto *iter = std::next(vector.begin(), idx); vector.push_back(*iter); vector.erase(iter); }; // Iterate over our use_device_ptr list and shift all non-cptr arguments into // use_device_addr. for (auto *it = devicePtrOperands.begin(); it != devicePtrOperands.end();) { if (!fir::isa_builtin_cptr_type(fir::unwrapRefType(it->getType()))) { deviceAddrOperands.push_back(*it); // We have to shuffle the symbols around as well, to maintain // the correct Input -> BlockArg for use_device_ptr/use_device_addr. // NOTE: However, as map's do not seem to be included currently // this isn't as pertinent, but we must try to maintain for // future alterations. I believe the reason they are not currently // is that the BlockArg assign/lowering needs to be extended // to a greater set of types. auto idx = std::distance(devicePtrOperands.begin(), it); moveElementToBack(idx, useDeviceTypes); moveElementToBack(idx, useDeviceLocs); moveElementToBack(idx, useDeviceSymbols); it = devicePtrOperands.erase(it); continue; } ++it; } } static mlir::omp::TargetDataOp genTargetDataOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &clauseList) { Fortran::lower::StatementContext stmtCtx; mlir::Value ifClauseOperand, deviceOperand; llvm::SmallVector mapOperands, devicePtrOperands, deviceAddrOperands; llvm::SmallVector useDeviceTypes; llvm::SmallVector useDeviceLocs; llvm::SmallVector useDeviceSymbols; ClauseProcessor cp(converter, semaCtx, clauseList); cp.processIf(llvm::omp::Directive::OMPD_target_data, ifClauseOperand); cp.processDevice(stmtCtx, deviceOperand); cp.processUseDevicePtr(devicePtrOperands, useDeviceTypes, useDeviceLocs, useDeviceSymbols); cp.processUseDeviceAddr(deviceAddrOperands, useDeviceTypes, useDeviceLocs, useDeviceSymbols); // This function implements the deprecated functionality of use_device_ptr // that allows users to provide non-CPTR arguments to it with the caveat // that the compiler will treat them as use_device_addr. A lot of legacy // code may still depend on this functionality, so we should support it // in some manner. We do so currently by simply shifting non-cptr operands // from the use_device_ptr list into the front of the use_device_addr list // whilst maintaining the ordering of useDeviceLocs, useDeviceSymbols and // useDeviceTypes to use_device_ptr/use_device_addr input for BlockArg // ordering. // TODO: Perhaps create a user provideable compiler option that will // re-introduce a hard-error rather than a warning in these cases. promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr( devicePtrOperands, deviceAddrOperands, useDeviceTypes, useDeviceLocs, useDeviceSymbols); cp.processMap(currentLocation, llvm::omp::Directive::OMPD_target_data, stmtCtx, mapOperands); auto dataOp = converter.getFirOpBuilder().create( currentLocation, ifClauseOperand, deviceOperand, devicePtrOperands, deviceAddrOperands, mapOperands); genBodyOfTargetDataOp(converter, semaCtx, eval, genNested, dataOp, useDeviceTypes, useDeviceLocs, useDeviceSymbols, currentLocation); return dataOp; } template static OpTy genTargetEnterExitDataUpdateOp( Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &clauseList) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); Fortran::lower::StatementContext stmtCtx; mlir::Value ifClauseOperand, deviceOperand; mlir::UnitAttr nowaitAttr; llvm::SmallVector mapOperands, dependOperands; llvm::SmallVector dependTypeOperands; // GCC 9.3.0 emits a (probably) bogus warning about an unused variable. [[maybe_unused]] llvm::omp::Directive directive; if constexpr (std::is_same_v) { directive = llvm::omp::Directive::OMPD_target_enter_data; } else if constexpr (std::is_same_v) { directive = llvm::omp::Directive::OMPD_target_exit_data; } else if constexpr (std::is_same_v) { directive = llvm::omp::Directive::OMPD_target_update; } else { return nullptr; } ClauseProcessor cp(converter, semaCtx, clauseList); cp.processIf(directive, ifClauseOperand); cp.processDevice(stmtCtx, deviceOperand); cp.processDepend(dependTypeOperands, dependOperands); cp.processNowait(nowaitAttr); if constexpr (std::is_same_v) { cp.processMotionClauses(stmtCtx, mapOperands); cp.processMotionClauses(stmtCtx, mapOperands); } else { cp.processMap(currentLocation, directive, stmtCtx, mapOperands); } return firOpBuilder.create( currentLocation, ifClauseOperand, deviceOperand, dependTypeOperands.empty() ? nullptr : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), dependTypeOperands), dependOperands, nowaitAttr, mapOperands); } // This functions creates a block for the body of the targetOp's region. It adds // all the symbols present in mapSymbols as block arguments to this block. static void genBodyOfTargetOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::omp::TargetOp &targetOp, llvm::ArrayRef mapSymTypes, llvm::ArrayRef mapSymLocs, llvm::ArrayRef mapSymbols, const mlir::Location ¤tLocation) { assert(mapSymTypes.size() == mapSymLocs.size()); fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::Region ®ion = targetOp.getRegion(); auto *regionBlock = firOpBuilder.createBlock(®ion, {}, mapSymTypes, mapSymLocs); // Clones the `bounds` placing them inside the target region and returns them. auto cloneBound = [&](mlir::Value bound) { if (mlir::isMemoryEffectFree(bound.getDefiningOp())) { mlir::Operation *clonedOp = bound.getDefiningOp()->clone(); regionBlock->push_back(clonedOp); return clonedOp->getResult(0); } TODO(converter.getCurrentLocation(), "target map clause operand unsupported bound type"); }; auto cloneBounds = [cloneBound](llvm::ArrayRef bounds) { llvm::SmallVector clonedBounds; for (mlir::Value bound : bounds) clonedBounds.emplace_back(cloneBound(bound)); return clonedBounds; }; // Bind the symbols to their corresponding block arguments. for (auto [argIndex, argSymbol] : llvm::enumerate(mapSymbols)) { const mlir::BlockArgument &arg = region.getArgument(argIndex); // Avoid capture of a reference to a structured binding. const Fortran::semantics::Symbol *sym = argSymbol; // Structure component symbols don't have bindings. if (sym->owner().IsDerivedType()) continue; fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym); extVal.match( [&](const fir::BoxValue &v) { converter.bindSymbol(*sym, fir::BoxValue(arg, cloneBounds(v.getLBounds()), v.getExplicitParameters(), v.getExplicitExtents())); }, [&](const fir::MutableBoxValue &v) { converter.bindSymbol( *sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()), v.getMutableProperties())); }, [&](const fir::ArrayBoxValue &v) { converter.bindSymbol( *sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()), cloneBounds(v.getLBounds()), v.getSourceBox())); }, [&](const fir::CharArrayBoxValue &v) { converter.bindSymbol( *sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()), cloneBounds(v.getExtents()), cloneBounds(v.getLBounds()))); }, [&](const fir::CharBoxValue &v) { converter.bindSymbol(*sym, fir::CharBoxValue(arg, cloneBound(v.getLen()))); }, [&](const fir::UnboxedValue &v) { converter.bindSymbol(*sym, arg); }, [&](const auto &) { TODO(converter.getCurrentLocation(), "target map clause operand unsupported type"); }); } // Check if cloning the bounds introduced any dependency on the outer region. // If so, then either clone them as well if they are MemoryEffectFree, or else // copy them to a new temporary and add them to the map and block_argument // lists and replace their uses with the new temporary. llvm::SetVector valuesDefinedAbove; mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); while (!valuesDefinedAbove.empty()) { for (mlir::Value val : valuesDefinedAbove) { mlir::Operation *valOp = val.getDefiningOp(); if (mlir::isMemoryEffectFree(valOp)) { mlir::Operation *clonedOp = valOp->clone(); regionBlock->push_front(clonedOp); val.replaceUsesWithIf( clonedOp->getResult(0), [regionBlock](mlir::OpOperand &use) { return use.getOwner()->getBlock() == regionBlock; }); } else { auto savedIP = firOpBuilder.getInsertionPoint(); firOpBuilder.setInsertionPointAfter(valOp); auto copyVal = firOpBuilder.createTemporary(val.getLoc(), val.getType()); firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); llvm::SmallVector bounds; std::stringstream name; firOpBuilder.setInsertionPoint(targetOp); mlir::Value mapOp = createMapInfoOp( firOpBuilder, copyVal.getLoc(), copyVal, mlir::Value{}, name.str(), bounds, llvm::SmallVector{}, static_cast< std::underlying_type_t>( llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); targetOp.getMapOperandsMutable().append(mapOp); mlir::Value clonedValArg = region.addArgument(copyVal.getType(), copyVal.getLoc()); firOpBuilder.setInsertionPointToStart(regionBlock); auto loadOp = firOpBuilder.create(clonedValArg.getLoc(), clonedValArg); val.replaceUsesWithIf( loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) { return use.getOwner()->getBlock() == regionBlock; }); firOpBuilder.setInsertionPoint(regionBlock, savedIP); } } valuesDefinedAbove.clear(); mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); } // Insert dummy instruction to remember the insertion position. The // marker will be deleted since there are not uses. // In the HLFIR flow there are hlfir.declares inserted above while // setting block arguments. mlir::Value undefMarker = firOpBuilder.create( targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType()); // Create blocks for unstructured regions. This has to be done since // blocks are initially allocated with the function as the parent region. if (eval.lowerAsUnstructured()) { Fortran::lower::createEmptyRegionBlocks( firOpBuilder, eval.getNestedEvaluations()); } firOpBuilder.create(currentLocation); // Create the insertion point after the marker. firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); if (genNested) genNestedEvaluations(converter, eval); } static mlir::omp::TargetOp genTargetOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &clauseList, llvm::omp::Directive directive, bool outerCombined = false) { Fortran::lower::StatementContext stmtCtx; mlir::Value ifClauseOperand, deviceOperand, threadLimitOperand; mlir::UnitAttr nowaitAttr; llvm::SmallVector dependTypeOperands; llvm::SmallVector mapOperands, dependOperands; llvm::SmallVector mapSymTypes; llvm::SmallVector mapSymLocs; llvm::SmallVector mapSymbols; ClauseProcessor cp(converter, semaCtx, clauseList); cp.processIf(llvm::omp::Directive::OMPD_target, ifClauseOperand); cp.processDevice(stmtCtx, deviceOperand); cp.processThreadLimit(stmtCtx, threadLimitOperand); cp.processDepend(dependTypeOperands, dependOperands); cp.processNowait(nowaitAttr); cp.processMap(currentLocation, directive, stmtCtx, mapOperands, &mapSymTypes, &mapSymLocs, &mapSymbols); cp.processTODO( currentLocation, llvm::omp::Directive::OMPD_target); // 5.8.1 Implicit Data-Mapping Attribute Rules // The following code follows the implicit data-mapping rules to map all the // symbols used inside the region that have not been explicitly mapped using // the map clause. auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) { if (llvm::find(mapSymbols, &sym) == mapSymbols.end()) { mlir::Value baseOp = converter.getSymbolAddress(sym); if (!baseOp) if (const auto *details = sym.template detailsIf< Fortran::semantics::HostAssocDetails>()) { baseOp = converter.getSymbolAddress(details->symbol()); converter.copySymbolBinding(details->symbol(), sym); } if (baseOp) { llvm::SmallVector bounds; std::stringstream name; fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym); name << sym.name().ToString(); Fortran::lower::AddrAndBoundsInfo info = getDataOperandBaseAddr(converter, converter.getFirOpBuilder(), sym, converter.getCurrentLocation()); if (fir::unwrapRefType(info.addr.getType()).isa()) bounds = Fortran::lower::genBoundsOpsFromBox( converter.getFirOpBuilder(), converter.getCurrentLocation(), converter, dataExv, info); if (fir::unwrapRefType(info.addr.getType()).isa()) { bool dataExvIsAssumedSize = Fortran::semantics::IsAssumedSizeArray(sym.GetUltimate()); bounds = Fortran::lower::genBaseBoundsOps( converter.getFirOpBuilder(), converter.getCurrentLocation(), converter, dataExv, dataExvIsAssumedSize); } llvm::omp::OpenMPOffloadMappingFlags mapFlag = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; mlir::omp::VariableCaptureKind captureKind = mlir::omp::VariableCaptureKind::ByRef; mlir::Type eleType = baseOp.getType(); if (auto refType = baseOp.getType().dyn_cast()) eleType = refType.getElementType(); // If a variable is specified in declare target link and if device // type is not specified as `nohost`, it needs to be mapped tofrom mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym)); auto declareTargetOp = llvm::dyn_cast_if_present(op); if (declareTargetOp && declareTargetOp.isDeclareTarget()) { if (declareTargetOp.getDeclareTargetCaptureClause() == mlir::omp::DeclareTargetCaptureClause::link && declareTargetOp.getDeclareTargetDeviceType() != mlir::omp::DeclareTargetDeviceType::nohost) { mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; } } else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { captureKind = mlir::omp::VariableCaptureKind::ByCopy; } else if (!fir::isa_builtin_cptr_type(eleType)) { mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; } mlir::Value mapOp = createMapInfoOp( converter.getFirOpBuilder(), baseOp.getLoc(), baseOp, mlir::Value{}, name.str(), bounds, {}, static_cast< std::underlying_type_t>( mapFlag), captureKind, baseOp.getType()); mapOperands.push_back(mapOp); mapSymTypes.push_back(baseOp.getType()); mapSymLocs.push_back(baseOp.getLoc()); mapSymbols.push_back(&sym); } } }; Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap); auto targetOp = converter.getFirOpBuilder().create( currentLocation, ifClauseOperand, deviceOperand, threadLimitOperand, dependTypeOperands.empty() ? nullptr : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), dependTypeOperands), dependOperands, nowaitAttr, mapOperands); genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSymTypes, mapSymLocs, mapSymbols, currentLocation); return targetOp; } static mlir::omp::TeamsOp genTeamsOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &clauseList, bool outerCombined = false) { Fortran::lower::StatementContext stmtCtx; mlir::Value numTeamsClauseOperand, ifClauseOperand, threadLimitClauseOperand; llvm::SmallVector allocateOperands, allocatorOperands, reductionVars; llvm::SmallVector reductionDeclSymbols; ClauseProcessor cp(converter, semaCtx, clauseList); cp.processIf(llvm::omp::Directive::OMPD_teams, ifClauseOperand); cp.processAllocate(allocatorOperands, allocateOperands); cp.processDefault(); cp.processNumTeams(stmtCtx, numTeamsClauseOperand); cp.processThreadLimit(stmtCtx, threadLimitClauseOperand); cp.processTODO(currentLocation, llvm::omp::Directive::OMPD_teams); return genOpWithBody( OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) .setGenNested(genNested) .setOuterCombined(outerCombined) .setClauses(&clauseList), /*num_teams_lower=*/nullptr, numTeamsClauseOperand, ifClauseOperand, threadLimitClauseOperand, allocateOperands, allocatorOperands, reductionVars, reductionDeclSymbols.empty() ? nullptr : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), reductionDeclSymbols)); } /// Extract the list of function and variable symbols affected by the given /// 'declare target' directive and return the intended device type for them. static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo( Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, llvm::SmallVectorImpl &symbolAndClause) { // The default capture type mlir::omp::DeclareTargetDeviceType deviceType = mlir::omp::DeclareTargetDeviceType::any; const auto &spec = std::get( declareTargetConstruct.t); if (const auto *objectList{ Fortran::parser::Unwrap(spec.u)}) { ObjectList objects{makeObjects(*objectList, semaCtx)}; // Case: declare target(func, var1, var2) gatherFuncAndVarSyms(objects, mlir::omp::DeclareTargetCaptureClause::to, symbolAndClause); } else if (const auto *clauseList{ Fortran::parser::Unwrap( spec.u)}) { if (clauseList->v.empty()) { // Case: declare target, implicit capture of function symbolAndClause.emplace_back( mlir::omp::DeclareTargetCaptureClause::to, eval.getOwningProcedure()->getSubprogramSymbol()); } ClauseProcessor cp(converter, semaCtx, *clauseList); cp.processTo(symbolAndClause); cp.processEnter(symbolAndClause); cp.processLink(symbolAndClause); cp.processDeviceType(deviceType); cp.processTODO(converter.getCurrentLocation(), llvm::omp::Directive::OMPD_declare_target); } return deviceType; } static void collectDeferredDeclareTargets( Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, llvm::SmallVectorImpl &deferredDeclareTarget) { llvm::SmallVector symbolAndClause; mlir::omp::DeclareTargetDeviceType devType = getDeclareTargetInfo( converter, semaCtx, eval, declareTargetConstruct, symbolAndClause); // Return the device type only if at least one of the targets for the // directive is a function or subroutine mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); for (const DeclareTargetCapturePair &symClause : symbolAndClause) { mlir::Operation *op = mod.lookupSymbol(converter.mangleName( std::get(symClause))); if (!op) { deferredDeclareTarget.push_back( {std::get<0>(symClause), devType, std::get<1>(symClause)}); } } } static std::optional getDeclareTargetFunctionDevice( Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) { llvm::SmallVector symbolAndClause; mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo( converter, semaCtx, eval, declareTargetConstruct, symbolAndClause); // Return the device type only if at least one of the targets for the // directive is a function or subroutine mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); for (const DeclareTargetCapturePair &symClause : symbolAndClause) { mlir::Operation *op = mod.lookupSymbol(converter.mangleName( std::get(symClause))); if (mlir::isa_and_nonnull(op)) return deviceType; } return std::nullopt; } //===----------------------------------------------------------------------===// // genOMP() Code generation helper functions //===----------------------------------------------------------------------===// static void genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, const Fortran::parser::OpenMPSimpleStandaloneConstruct &simpleStandaloneConstruct) { const auto &directive = std::get( simpleStandaloneConstruct.t); fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); const auto &opClauseList = std::get(simpleStandaloneConstruct.t); mlir::Location currentLocation = converter.genLocation(directive.source); switch (directive.v) { default: break; case llvm::omp::Directive::OMPD_barrier: firOpBuilder.create(currentLocation); break; case llvm::omp::Directive::OMPD_taskwait: ClauseProcessor(converter, semaCtx, opClauseList) .processTODO( currentLocation, llvm::omp::Directive::OMPD_taskwait); firOpBuilder.create(currentLocation); break; case llvm::omp::Directive::OMPD_taskyield: firOpBuilder.create(currentLocation); break; case llvm::omp::Directive::OMPD_target_data: genTargetDataOp(converter, semaCtx, eval, genNested, currentLocation, opClauseList); break; case llvm::omp::Directive::OMPD_target_enter_data: genTargetEnterExitDataUpdateOp( converter, semaCtx, currentLocation, opClauseList); break; case llvm::omp::Directive::OMPD_target_exit_data: genTargetEnterExitDataUpdateOp( converter, semaCtx, currentLocation, opClauseList); break; case llvm::omp::Directive::OMPD_target_update: genTargetEnterExitDataUpdateOp( converter, semaCtx, currentLocation, opClauseList); break; case llvm::omp::Directive::OMPD_ordered: TODO(currentLocation, "OMPD_ordered"); } } static void genOmpFlush(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPFlushConstruct &flushConstruct) { llvm::SmallVector operandRange; if (const auto &ompObjectList = std::get>( flushConstruct.t)) genObjectList2(*ompObjectList, converter, operandRange); const auto &memOrderClause = std::get>>( flushConstruct.t); if (memOrderClause && memOrderClause->size() > 0) TODO(converter.getCurrentLocation(), "Handle OmpMemoryOrderClause"); converter.getFirOpBuilder().create( converter.getCurrentLocation(), operandRange); } static llvm::SmallVector genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, mlir::Location &loc, llvm::ArrayRef args) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); auto ®ion = op->getRegion(0); std::size_t loopVarTypeSize = 0; for (const Fortran::semantics::Symbol *arg : args) loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); llvm::SmallVector tiv(args.size(), loopVarType); llvm::SmallVector locs(args.size(), loc); firOpBuilder.createBlock(®ion, {}, tiv, locs); // The argument is not currently in memory, so make a temporary for the // argument, and store it there, then bind that location to the argument. mlir::Operation *storeOp = nullptr; for (auto [argIndex, argSymbol] : llvm::enumerate(args)) { mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex)); storeOp = createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); } firOpBuilder.setInsertionPointAfter(storeOp); return llvm::SmallVector(args); } static llvm::SmallVector genLoopAndReductionVars( mlir::Operation *op, Fortran::lower::AbstractConverter &converter, mlir::Location &loc, llvm::ArrayRef loopArgs, llvm::ArrayRef reductionArgs, llvm::ArrayRef reductionTypes) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); llvm::SmallVector blockArgTypes; llvm::SmallVector blockArgLocs; blockArgTypes.reserve(loopArgs.size() + reductionArgs.size()); blockArgLocs.reserve(blockArgTypes.size()); mlir::Block *entryBlock; if (loopArgs.size()) { std::size_t loopVarTypeSize = 0; for (const Fortran::semantics::Symbol *arg : loopArgs) loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); std::fill_n(std::back_inserter(blockArgTypes), loopArgs.size(), loopVarType); std::fill_n(std::back_inserter(blockArgLocs), loopArgs.size(), loc); } if (reductionArgs.size()) { llvm::copy(reductionTypes, std::back_inserter(blockArgTypes)); std::fill_n(std::back_inserter(blockArgLocs), reductionArgs.size(), loc); } entryBlock = firOpBuilder.createBlock(&op->getRegion(0), {}, blockArgTypes, blockArgLocs); // The argument is not currently in memory, so make a temporary for the // argument, and store it there, then bind that location to the argument. if (loopArgs.size()) { mlir::Operation *storeOp = nullptr; for (auto [argIndex, argSymbol] : llvm::enumerate(loopArgs)) { mlir::Value indexVal = fir::getBase(op->getRegion(0).front().getArgument(argIndex)); storeOp = createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); } firOpBuilder.setInsertionPointAfter(storeOp); } // Bind the reduction arguments to their block arguments for (auto [arg, prv] : llvm::zip_equal( reductionArgs, llvm::drop_begin(entryBlock->getArguments(), loopArgs.size()))) { converter.bindSymbol(*arg, prv); } return llvm::SmallVector(loopArgs); } static void createSimdLoop(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive ompDirective, const Fortran::parser::OmpClauseList &loopOpClauseList, mlir::Location loc) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); DataSharingProcessor dsp(converter, semaCtx, loopOpClauseList, eval); dsp.processStep1(); Fortran::lower::StatementContext stmtCtx; mlir::Value scheduleChunkClauseOperand, ifClauseOperand; llvm::SmallVector lowerBound, upperBound, step, reductionVars; llvm::SmallVector alignedVars, nontemporalVars; llvm::SmallVector iv; llvm::SmallVector reductionTypes; llvm::SmallVector reductionDeclSymbols; mlir::omp::ClauseOrderKindAttr orderClauseOperand; mlir::IntegerAttr simdlenClauseOperand, safelenClauseOperand; ClauseProcessor cp(converter, semaCtx, loopOpClauseList); cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv); cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand); cp.processReduction(loc, reductionVars, reductionTypes, reductionDeclSymbols); cp.processIf(llvm::omp::Directive::OMPD_simd, ifClauseOperand); cp.processSimdlen(simdlenClauseOperand); cp.processSafelen(safelenClauseOperand); cp.processTODO(loc, ompDirective); mlir::TypeRange resultType; auto simdLoopOp = firOpBuilder.create( loc, resultType, lowerBound, upperBound, step, alignedVars, /*alignment_values=*/nullptr, ifClauseOperand, nontemporalVars, orderClauseOperand, simdlenClauseOperand, safelenClauseOperand, /*inclusive=*/firOpBuilder.getUnitAttr()); auto *nestedEval = getCollapsedLoopEval( eval, Fortran::lower::getCollapseValue(loopOpClauseList)); auto ivCallback = [&](mlir::Operation *op) { return genLoopVars(op, converter, loc, iv); }; createBodyOfOp( simdLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval) .setClauses(&loopOpClauseList) .setDataSharingProcessor(&dsp) .setGenRegionEntryCb(ivCallback)); } static void createWsloop(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive ompDirective, const Fortran::parser::OmpClauseList &beginClauseList, const Fortran::parser::OmpClauseList *endClauseList, mlir::Location loc) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); DataSharingProcessor dsp(converter, semaCtx, beginClauseList, eval); dsp.processStep1(); Fortran::lower::StatementContext stmtCtx; mlir::Value scheduleChunkClauseOperand; llvm::SmallVector lowerBound, upperBound, step, reductionVars; llvm::SmallVector linearVars, linearStepVars; llvm::SmallVector iv; llvm::SmallVector reductionTypes; llvm::SmallVector reductionDeclSymbols; llvm::SmallVector reductionSymbols; mlir::omp::ClauseOrderKindAttr orderClauseOperand; mlir::omp::ClauseScheduleKindAttr scheduleValClauseOperand; mlir::UnitAttr nowaitClauseOperand, byrefOperand, scheduleSimdClauseOperand; mlir::IntegerAttr orderedClauseOperand; mlir::omp::ScheduleModifierAttr scheduleModClauseOperand; ClauseProcessor cp(converter, semaCtx, beginClauseList); cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv); cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand); cp.processReduction(loc, reductionVars, reductionTypes, reductionDeclSymbols, &reductionSymbols); cp.processTODO(loc, ompDirective); if (ReductionProcessor::doReductionByRef(reductionVars)) byrefOperand = firOpBuilder.getUnitAttr(); auto wsLoopOp = firOpBuilder.create( loc, lowerBound, upperBound, step, linearVars, linearStepVars, reductionVars, reductionDeclSymbols.empty() ? nullptr : mlir::ArrayAttr::get(firOpBuilder.getContext(), reductionDeclSymbols), scheduleValClauseOperand, scheduleChunkClauseOperand, /*schedule_modifiers=*/nullptr, /*simd_modifier=*/nullptr, nowaitClauseOperand, byrefOperand, orderedClauseOperand, orderClauseOperand, /*inclusive=*/firOpBuilder.getUnitAttr()); // Handle attribute based clauses. if (cp.processOrdered(orderedClauseOperand)) wsLoopOp.setOrderedValAttr(orderedClauseOperand); if (cp.processSchedule(scheduleValClauseOperand, scheduleModClauseOperand, scheduleSimdClauseOperand)) { wsLoopOp.setScheduleValAttr(scheduleValClauseOperand); wsLoopOp.setScheduleModifierAttr(scheduleModClauseOperand); wsLoopOp.setSimdModifierAttr(scheduleSimdClauseOperand); } // In FORTRAN `nowait` clause occur at the end of `omp do` directive. // i.e // !$omp do // <...> // !$omp end do nowait if (endClauseList) { if (ClauseProcessor(converter, semaCtx, *endClauseList) .processNowait(nowaitClauseOperand)) wsLoopOp.setNowaitAttr(nowaitClauseOperand); } auto *nestedEval = getCollapsedLoopEval( eval, Fortran::lower::getCollapseValue(beginClauseList)); auto ivCallback = [&](mlir::Operation *op) { return genLoopAndReductionVars(op, converter, loc, iv, reductionSymbols, reductionTypes); }; createBodyOfOp( wsLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval) .setClauses(&beginClauseList) .setDataSharingProcessor(&dsp) .setReductions(&reductionSymbols, &reductionTypes) .setGenRegionEntryCb(ivCallback)); } static void createSimdWsloop( Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive ompDirective, const Fortran::parser::OmpClauseList &beginClauseList, const Fortran::parser::OmpClauseList *endClauseList, mlir::Location loc) { ClauseProcessor cp(converter, semaCtx, beginClauseList); cp.processTODO(loc, ompDirective); // TODO: Add support for vectorization - add vectorization hints inside loop // body. // OpenMP standard does not specify the length of vector instructions. // Currently we safely assume that for !$omp do simd pragma the SIMD length // is equal to 1 (i.e. we generate standard workshare loop). // When support for vectorization is enabled, then we need to add handling of // if clause. Currently if clause can be skipped because we always assume // SIMD length = 1. createWsloop(converter, semaCtx, eval, ompDirective, beginClauseList, endClauseList, loc); } static void markDeclareTarget(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, mlir::omp::DeclareTargetCaptureClause captureClause, mlir::omp::DeclareTargetDeviceType deviceType) { // TODO: Add support for program local variables with declare target applied auto declareTargetOp = llvm::dyn_cast(op); if (!declareTargetOp) fir::emitFatalError( converter.getCurrentLocation(), "Attempt to apply declare target on unsupported operation"); // The function or global already has a declare target applied to it, very // likely through implicit capture (usage in another declare target // function/subroutine). It should be marked as any if it has been assigned // both host and nohost, else we skip, as there is no change if (declareTargetOp.isDeclareTarget()) { if (declareTargetOp.getDeclareTargetDeviceType() != deviceType) declareTargetOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::any, captureClause); return; } declareTargetOp.setDeclareTarget(deviceType, captureClause); } //===----------------------------------------------------------------------===// // OpenMPDeclarativeConstruct visitors //===----------------------------------------------------------------------===// static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPDeclarativeAllocate &declarativeAllocate) { TODO(converter.getCurrentLocation(), "OpenMPDeclarativeAllocate"); } static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPDeclareReductionConstruct &declareReductionConstruct) { TODO(converter.getCurrentLocation(), "OpenMPDeclareReductionConstruct"); } static void genOMP( Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPDeclareSimdConstruct &declareSimdConstruct) { TODO(converter.getCurrentLocation(), "OpenMPDeclareSimdConstruct"); } static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) { llvm::SmallVector symbolAndClause; mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo( converter, semaCtx, eval, declareTargetConstruct, symbolAndClause); for (const DeclareTargetCapturePair &symClause : symbolAndClause) { mlir::Operation *op = mod.lookupSymbol(converter.mangleName( std::get(symClause))); // Some symbols are deferred until later in the module, these are handled // upon finalization of the module for OpenMP inside of Bridge, so we simply // skip for now. if (!op) continue; markDeclareTarget( op, converter, std::get(symClause), deviceType); } } static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPRequiresConstruct &requiresConstruct) { // Requires directives are gathered and processed in semantics and // then combined in the lowering bridge before triggering codegen // just once. Hence, there is no need to lower each individual // occurrence here. } static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPThreadprivate &threadprivate) { // The directive is lowered when instantiating the variable to // support the case of threadprivate variable declared in module. } static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPDeclarativeConstruct &ompDeclConstruct) { std::visit( [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, ompDeclConstruct.u); } //===----------------------------------------------------------------------===// // OpenMPConstruct visitors //===----------------------------------------------------------------------===// static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPAllocatorsConstruct &allocsConstruct) { TODO(converter.getCurrentLocation(), "OpenMPAllocatorsConstruct"); } static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPAtomicConstruct &atomicConstruct) { std::visit( Fortran::common::visitors{ [&](const Fortran::parser::OmpAtomicRead &atomicRead) { mlir::Location loc = converter.genLocation(atomicRead.source); Fortran::lower::genOmpAccAtomicRead< Fortran::parser::OmpAtomicRead, Fortran::parser::OmpAtomicClauseList>(converter, atomicRead, loc); }, [&](const Fortran::parser::OmpAtomicWrite &atomicWrite) { mlir::Location loc = converter.genLocation(atomicWrite.source); Fortran::lower::genOmpAccAtomicWrite< Fortran::parser::OmpAtomicWrite, Fortran::parser::OmpAtomicClauseList>(converter, atomicWrite, loc); }, [&](const Fortran::parser::OmpAtomic &atomicConstruct) { mlir::Location loc = converter.genLocation(atomicConstruct.source); Fortran::lower::genOmpAtomic( converter, atomicConstruct, loc); }, [&](const Fortran::parser::OmpAtomicUpdate &atomicUpdate) { mlir::Location loc = converter.genLocation(atomicUpdate.source); Fortran::lower::genOmpAccAtomicUpdate< Fortran::parser::OmpAtomicUpdate, Fortran::parser::OmpAtomicClauseList>(converter, atomicUpdate, loc); }, [&](const Fortran::parser::OmpAtomicCapture &atomicCapture) { mlir::Location loc = converter.genLocation(atomicCapture.source); Fortran::lower::genOmpAccAtomicCapture< Fortran::parser::OmpAtomicCapture, Fortran::parser::OmpAtomicClauseList>(converter, atomicCapture, loc); }, }, atomicConstruct.u); } static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPBlockConstruct &blockConstruct) { const auto &beginBlockDirective = std::get(blockConstruct.t); const auto &endBlockDirective = std::get(blockConstruct.t); const auto &directive = std::get(beginBlockDirective.t); const auto &beginClauseList = std::get(beginBlockDirective.t); const auto &endClauseList = std::get(endBlockDirective.t); for (const Fortran::parser::OmpClause &clause : beginClauseList.v) { mlir::Location clauseLocation = converter.genLocation(clause.source); if (!std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u)) { TODO(clauseLocation, "OpenMP Block construct clause"); } } for (const auto &clause : endClauseList.v) { mlir::Location clauseLocation = converter.genLocation(clause.source); if (!std::get_if(&clause.u) && !std::get_if(&clause.u)) TODO(clauseLocation, "OpenMP Block construct clause"); } bool singleDirective = true; mlir::Location currentLocation = converter.genLocation(directive.source); switch (directive.v) { case llvm::omp::Directive::OMPD_master: genMasterOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation); break; case llvm::omp::Directive::OMPD_ordered: genOrderedRegionOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation); break; case llvm::omp::Directive::OMPD_parallel: genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/true, currentLocation, beginClauseList); break; case llvm::omp::Directive::OMPD_single: genSingleOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, beginClauseList, endClauseList); break; case llvm::omp::Directive::OMPD_target: genTargetOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, beginClauseList, directive.v); break; case llvm::omp::Directive::OMPD_target_data: genTargetDataOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, beginClauseList); break; case llvm::omp::Directive::OMPD_task: genTaskOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, beginClauseList); break; case llvm::omp::Directive::OMPD_taskgroup: genTaskgroupOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, beginClauseList); break; case llvm::omp::Directive::OMPD_teams: genTeamsOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, beginClauseList, /*outerCombined=*/false); break; case llvm::omp::Directive::OMPD_workshare: // FIXME: Workshare is not a commonly used OpenMP construct, an // implementation for this feature will come later. For the codes // that use this construct, add a single construct for now. genSingleOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, beginClauseList, endClauseList); break; default: singleDirective = false; break; } if (singleDirective) return; // Codegen for combined directives bool combinedDirective = false; if ((llvm::omp::allTargetSet & llvm::omp::blockConstructSet) .test(directive.v)) { genTargetOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, beginClauseList, directive.v, /*outerCombined=*/true); combinedDirective = true; } if ((llvm::omp::allTeamsSet & llvm::omp::blockConstructSet) .test(directive.v)) { genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, beginClauseList); combinedDirective = true; } if ((llvm::omp::allParallelSet & llvm::omp::blockConstructSet) .test(directive.v)) { bool outerCombined = directive.v != llvm::omp::Directive::OMPD_target_parallel; genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, currentLocation, beginClauseList, outerCombined); combinedDirective = true; } if ((llvm::omp::workShareSet & llvm::omp::blockConstructSet) .test(directive.v)) { genSingleOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, beginClauseList, endClauseList); combinedDirective = true; } if (!combinedDirective) TODO(currentLocation, "Unhandled block directive (" + llvm::omp::getOpenMPDirectiveName(directive.v) + ")"); genNestedEvaluations(converter, eval); } static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPCriticalConstruct &criticalConstruct) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::Location currentLocation = converter.getCurrentLocation(); mlir::IntegerAttr hintClauseOp; std::string name; const Fortran::parser::OmpCriticalDirective &cd = std::get(criticalConstruct.t); if (std::get>(cd.t).has_value()) { name = std::get>(cd.t).value().ToString(); } const auto &clauseList = std::get(cd.t); ClauseProcessor(converter, semaCtx, clauseList).processHint(hintClauseOp); mlir::omp::CriticalOp criticalOp = [&]() { if (name.empty()) { return firOpBuilder.create( currentLocation, mlir::FlatSymbolRefAttr()); } mlir::ModuleOp module = firOpBuilder.getModule(); mlir::OpBuilder modBuilder(module.getBodyRegion()); auto global = module.lookupSymbol(name); if (!global) global = modBuilder.create( currentLocation, mlir::StringAttr::get(firOpBuilder.getContext(), name), hintClauseOp); return firOpBuilder.create( currentLocation, mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(), global.getSymName())); }(); auto genInfo = OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval); createBodyOfOp(criticalOp, genInfo); } static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPExecutableAllocate &execAllocConstruct) { TODO(converter.getCurrentLocation(), "OpenMPExecutableAllocate"); } static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPLoopConstruct &loopConstruct) { const auto &beginLoopDirective = std::get(loopConstruct.t); const auto &loopOpClauseList = std::get(beginLoopDirective.t); mlir::Location currentLocation = converter.genLocation(beginLoopDirective.source); const auto ompDirective = std::get(beginLoopDirective.t).v; const auto *endClauseList = [&]() { using RetTy = const Fortran::parser::OmpClauseList *; if (auto &endLoopDirective = std::get>( loopConstruct.t)) { return RetTy( &std::get((*endLoopDirective).t)); } return RetTy(); }(); bool validDirective = false; if (llvm::omp::topTaskloopSet.test(ompDirective)) { validDirective = true; TODO(currentLocation, "Taskloop construct"); } else { // Create omp.{target, teams, distribute, parallel} nested operations if ((llvm::omp::allTargetSet & llvm::omp::loopConstructSet) .test(ompDirective)) { validDirective = true; genTargetOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, loopOpClauseList, ompDirective, /*outerCombined=*/true); } if ((llvm::omp::allTeamsSet & llvm::omp::loopConstructSet) .test(ompDirective)) { validDirective = true; genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, loopOpClauseList, /*outerCombined=*/true); } if (llvm::omp::allDistributeSet.test(ompDirective)) { validDirective = true; TODO(currentLocation, "Distribute construct"); } if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet) .test(ompDirective)) { validDirective = true; genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, currentLocation, loopOpClauseList, /*outerCombined=*/true); } } if ((llvm::omp::allDoSet | llvm::omp::allSimdSet).test(ompDirective)) validDirective = true; if (!validDirective) { TODO(currentLocation, "Unhandled loop directive (" + llvm::omp::getOpenMPDirectiveName(ompDirective) + ")"); } if (llvm::omp::allDoSimdSet.test(ompDirective)) { // 2.9.3.2 Workshare SIMD construct createSimdWsloop(converter, semaCtx, eval, ompDirective, loopOpClauseList, endClauseList, currentLocation); } else if (llvm::omp::allSimdSet.test(ompDirective)) { // 2.9.3.1 SIMD construct createSimdLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList, currentLocation); genOpenMPReduction(converter, semaCtx, loopOpClauseList); } else { createWsloop(converter, semaCtx, eval, ompDirective, loopOpClauseList, endClauseList, currentLocation); } } static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPSectionConstruct §ionConstruct) { // SECTION constructs are handled as a part of SECTIONS. llvm_unreachable("Unexpected standalone OMP SECTION"); } static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPSectionsConstruct §ionsConstruct) { mlir::Location currentLocation = converter.getCurrentLocation(); llvm::SmallVector allocateOperands, allocatorOperands; mlir::UnitAttr nowaitClauseOperand; const auto &beginSectionsDirective = std::get(sectionsConstruct.t); const auto §ionsClauseList = std::get(beginSectionsDirective.t); // Process clauses before optional omp.parallel, so that new variables are // allocated outside of the parallel region ClauseProcessor cp(converter, semaCtx, sectionsClauseList); cp.processSectionsReduction(currentLocation); cp.processAllocate(allocatorOperands, allocateOperands); llvm::omp::Directive dir = std::get(beginSectionsDirective.t) .v; // Parallel wrapper of PARALLEL SECTIONS construct if (dir == llvm::omp::Directive::OMPD_parallel_sections) { genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, currentLocation, sectionsClauseList, /*outerCombined=*/true); } else { const auto &endSectionsDirective = std::get(sectionsConstruct.t); const auto &endSectionsClauseList = std::get(endSectionsDirective.t); ClauseProcessor(converter, semaCtx, endSectionsClauseList) .processNowait(nowaitClauseOperand); } // SECTIONS construct genOpWithBody( OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) .setGenNested(false), /*reduction_vars=*/mlir::ValueRange(), /*reductions=*/nullptr, allocateOperands, allocatorOperands, nowaitClauseOperand); const auto §ionBlocks = std::get(sectionsConstruct.t); auto &firOpBuilder = converter.getFirOpBuilder(); auto ip = firOpBuilder.saveInsertionPoint(); for (const auto &[nblock, neval] : llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) { symTable.pushScope(); genSectionOp(converter, semaCtx, neval, /*genNested=*/true, currentLocation, sectionsClauseList); symTable.popScope(); firOpBuilder.restoreInsertionPoint(ip); } } static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPStandaloneConstruct &standaloneConstruct) { std::visit( Fortran::common::visitors{ [&](const Fortran::parser::OpenMPSimpleStandaloneConstruct &simpleStandaloneConstruct) { genOmpSimpleStandalone(converter, semaCtx, eval, /*genNested=*/true, simpleStandaloneConstruct); }, [&](const Fortran::parser::OpenMPFlushConstruct &flushConstruct) { genOmpFlush(converter, semaCtx, eval, flushConstruct); }, [&](const Fortran::parser::OpenMPCancelConstruct &cancelConstruct) { TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct"); }, [&](const Fortran::parser::OpenMPCancellationPointConstruct &cancellationPointConstruct) { TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct"); }, }, standaloneConstruct.u); } static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPConstruct &ompConstruct) { std::visit( [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, ompConstruct.u); } //===----------------------------------------------------------------------===// // Public functions //===----------------------------------------------------------------------===// mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder, mlir::Operation *op, mlir::Location loc) { if (mlir::isa(op)) return builder.create(loc); else return builder.create(loc); } void Fortran::lower::genOpenMPConstruct( Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPConstruct &omp) { symTable.pushScope(); genOMP(converter, symTable, semaCtx, eval, omp); symTable.popScope(); } void Fortran::lower::genOpenMPDeclarativeConstruct( Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPDeclarativeConstruct &omp) { genOMP(converter, symTable, semaCtx, eval, omp); genNestedEvaluations(converter, eval); } void Fortran::lower::genOpenMPSymbolProperties( Fortran::lower::AbstractConverter &converter, const Fortran::lower::pft::Variable &var) { assert(var.hasSymbol() && "Expecting Symbol"); const Fortran::semantics::Symbol &sym = var.getSymbol(); if (sym.test(Fortran::semantics::Symbol::Flag::OmpThreadprivate)) Fortran::lower::genThreadprivateOp(converter, var); if (sym.test(Fortran::semantics::Symbol::Flag::OmpDeclareTarget)) Fortran::lower::genDeclareTargetIntGlobal(converter, var); } int64_t Fortran::lower::getCollapseValue( const Fortran::parser::OmpClauseList &clauseList) { for (const Fortran::parser::OmpClause &clause : clauseList.v) { if (const auto &collapseClause = std::get_if(&clause.u)) { const auto *expr = Fortran::semantics::GetExpr(collapseClause->v); return Fortran::evaluate::ToInt64(*expr).value(); } } return 1; } void Fortran::lower::genThreadprivateOp( Fortran::lower::AbstractConverter &converter, const Fortran::lower::pft::Variable &var) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::Location currentLocation = converter.getCurrentLocation(); const Fortran::semantics::Symbol &sym = var.getSymbol(); mlir::Value symThreadprivateValue; if (const Fortran::semantics::Symbol *common = Fortran::semantics::FindCommonBlockContaining(sym.GetUltimate())) { mlir::Value commonValue = converter.getSymbolAddress(*common); if (mlir::isa(commonValue.getDefiningOp())) { // Generate ThreadprivateOp for a common block instead of its members and // only do it once for a common block. return; } // Generate ThreadprivateOp and rebind the common block. mlir::Value commonThreadprivateValue = firOpBuilder.create( currentLocation, commonValue.getType(), commonValue); converter.bindSymbol(*common, commonThreadprivateValue); // Generate the threadprivate value for the common block member. symThreadprivateValue = genCommonBlockMember(converter, currentLocation, sym, commonThreadprivateValue); } else if (!var.isGlobal()) { // Non-global variable which can be in threadprivate directive must be one // variable in main program, and it has implicit SAVE attribute. Take it as // with SAVE attribute, so to create GlobalOp for it to simplify the // translation to LLVM IR. // Avoids performing multiple globalInitializations. fir::GlobalOp global; auto module = converter.getModuleOp(); std::string globalName = converter.mangleName(sym); if (module.lookupSymbol(globalName)) global = module.lookupSymbol(globalName); else global = globalInitialization(converter, firOpBuilder, sym, var, currentLocation); mlir::Value symValue = firOpBuilder.create( currentLocation, global.resultType(), global.getSymbol()); symThreadprivateValue = firOpBuilder.create( currentLocation, symValue.getType(), symValue); } else { mlir::Value symValue = converter.getSymbolAddress(sym); // The symbol may be use-associated multiple times, and nothing needs to be // done after the original symbol is mapped to the threadprivatized value // for the first time. Use the threadprivatized value directly. mlir::Operation *op; if (auto declOp = symValue.getDefiningOp()) op = declOp.getMemref().getDefiningOp(); else op = symValue.getDefiningOp(); if (mlir::isa(op)) return; symThreadprivateValue = firOpBuilder.create( currentLocation, symValue.getType(), symValue); } fir::ExtendedValue sexv = converter.getSymbolExtendedValue(sym); fir::ExtendedValue symThreadprivateExv = getExtendedValue(sexv, symThreadprivateValue); converter.bindSymbol(sym, symThreadprivateExv); } // This function replicates threadprivate's behaviour of generating // an internal fir.GlobalOp for non-global variables in the main program // that have the implicit SAVE attribute, to simplifiy LLVM-IR and MLIR // generation. void Fortran::lower::genDeclareTargetIntGlobal( Fortran::lower::AbstractConverter &converter, const Fortran::lower::pft::Variable &var) { if (!var.isGlobal()) { // A non-global variable which can be in a declare target directive must // be a variable in the main program, and it has the implicit SAVE // attribute. We create a GlobalOp for it to simplify the translation to // LLVM IR. globalInitialization(converter, converter.getFirOpBuilder(), var.getSymbol(), var, converter.getCurrentLocation()); } } bool Fortran::lower::isOpenMPTargetConstruct( const Fortran::parser::OpenMPConstruct &omp) { llvm::omp::Directive dir = llvm::omp::Directive::OMPD_unknown; if (const auto *block = std::get_if(&omp.u)) { const auto &begin = std::get(block->t); dir = std::get(begin.t).v; } else if (const auto *loop = std::get_if(&omp.u)) { const auto &begin = std::get(loop->t); dir = std::get(begin.t).v; } return llvm::omp::allTargetSet.test(dir); } void Fortran::lower::gatherOpenMPDeferredDeclareTargets( Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPDeclarativeConstruct &ompDecl, llvm::SmallVectorImpl &deferredDeclareTarget) { std::visit( Fortran::common::visitors{ [&](const Fortran::parser::OpenMPDeclareTargetConstruct &ompReq) { collectDeferredDeclareTargets(converter, semaCtx, eval, ompReq, deferredDeclareTarget); }, [&](const auto &) {}, }, ompDecl.u); } bool Fortran::lower::isOpenMPDeviceDeclareTarget( Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPDeclarativeConstruct &ompDecl) { return std::visit( Fortran::common::visitors{ [&](const Fortran::parser::OpenMPDeclareTargetConstruct &ompReq) { mlir::omp::DeclareTargetDeviceType targetType = getDeclareTargetFunctionDevice(converter, semaCtx, eval, ompReq) .value_or(mlir::omp::DeclareTargetDeviceType::host); return targetType != mlir::omp::DeclareTargetDeviceType::host; }, [&](const auto &) { return false; }, }, ompDecl.u); } // In certain cases such as subroutine or function interfaces which declare // but do not define or directly call the subroutine or function in the same // module, their lowering is delayed until after the declare target construct // itself is processed, so there symbol is not within the table. // // This function will also return true if we encounter any device declare // target cases, to satisfy checking if we require the requires attributes // on the module. bool Fortran::lower::markOpenMPDeferredDeclareTargetFunctions( mlir::Operation *mod, llvm::SmallVectorImpl &deferredDeclareTargets, AbstractConverter &converter) { bool deviceCodeFound = false; auto modOp = llvm::cast(mod); for (auto declTar : deferredDeclareTargets) { mlir::Operation *op = modOp.lookupSymbol(converter.mangleName(declTar.sym)); // Due to interfaces being optionally emitted on usage in a module, // not finding an operation at this point cannot be a hard error, we // simply ignore it for now. // TODO: Add semantic checks for detecting cases where an erronous // (undefined) symbol has been supplied to a declare target clause if (!op) continue; auto devType = declTar.declareTargetDeviceType; if (!deviceCodeFound && devType != mlir::omp::DeclareTargetDeviceType::host) deviceCodeFound = true; markDeclareTarget(op, converter, declTar.declareTargetCaptureClause, devType); } return deviceCodeFound; } void Fortran::lower::genOpenMPRequires( mlir::Operation *mod, const Fortran::semantics::Symbol *symbol) { using MlirRequires = mlir::omp::ClauseRequires; using SemaRequires = Fortran::semantics::WithOmpDeclarative::RequiresFlag; if (auto offloadMod = llvm::dyn_cast(mod)) { Fortran::semantics::WithOmpDeclarative::RequiresFlags semaFlags; if (symbol) { Fortran::common::visit( [&](const auto &details) { if constexpr (std::is_base_of_v< Fortran::semantics::WithOmpDeclarative, std::decay_t>) { if (details.has_ompRequires()) semaFlags = *details.ompRequires(); } }, symbol->details()); } MlirRequires mlirFlags = MlirRequires::none; if (semaFlags.test(SemaRequires::ReverseOffload)) mlirFlags = mlirFlags | MlirRequires::reverse_offload; if (semaFlags.test(SemaRequires::UnifiedAddress)) mlirFlags = mlirFlags | MlirRequires::unified_address; if (semaFlags.test(SemaRequires::UnifiedSharedMemory)) mlirFlags = mlirFlags | MlirRequires::unified_shared_memory; if (semaFlags.test(SemaRequires::DynamicAllocators)) mlirFlags = mlirFlags | MlirRequires::dynamic_allocators; offloadMod.setRequires(mlirFlags); } }