//===-- OpenMP.cpp -- Open MP directive lowering --------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ // //===----------------------------------------------------------------------===// #include "flang/Lower/OpenMP.h" #include "ClauseProcessor.h" #include "Clauses.h" #include "DataSharingProcessor.h" #include "Decomposer.h" #include "ReductionProcessor.h" #include "Utils.h" #include "flang/Common/idioms.h" #include "flang/Lower/Bridge.h" #include "flang/Lower/ConvertExpr.h" #include "flang/Lower/ConvertVariable.h" #include "flang/Lower/DirectivesCommon.h" #include "flang/Lower/StatementContext.h" #include "flang/Lower/SymbolMap.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Parser/characters.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/openmp-directive-sets.h" #include "flang/Semantics/tools.h" #include "flang/Support/OpenMP-utils.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" using namespace Fortran::lower::omp; using namespace Fortran::common::openmp; //===----------------------------------------------------------------------===// // Code generation helper functions //===----------------------------------------------------------------------===// static void genOMPDispatch(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item); static void processHostEvalClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval, mlir::Location loc); namespace { /// Structure holding information that is needed to pass host-evaluated /// information to later lowering stages. class HostEvalInfo { public: // Allow this function access to private members in order to initialize them. friend void ::processHostEvalClauses(lower::AbstractConverter &, semantics::SemanticsContext &, lower::StatementContext &, lower::pft::Evaluation &, mlir::Location); /// Fill \c vars with values stored in \c ops. /// /// The order in which values are stored matches the one expected by \see /// bindOperands(). void collectValues(llvm::SmallVectorImpl &vars) const { vars.append(ops.loopLowerBounds); vars.append(ops.loopUpperBounds); vars.append(ops.loopSteps); if (ops.numTeamsLower) vars.push_back(ops.numTeamsLower); if (ops.numTeamsUpper) vars.push_back(ops.numTeamsUpper); if (ops.numThreads) vars.push_back(ops.numThreads); if (ops.threadLimit) vars.push_back(ops.threadLimit); } /// Update \c ops, replacing all values with the corresponding block argument /// in \c args. /// /// The order in which values are stored in \c args is the same as the one /// used by \see collectValues(). void bindOperands(llvm::ArrayRef args) { assert(args.size() == ops.loopLowerBounds.size() + ops.loopUpperBounds.size() + ops.loopSteps.size() + (ops.numTeamsLower ? 1 : 0) + (ops.numTeamsUpper ? 1 : 0) + (ops.numThreads ? 1 : 0) + (ops.threadLimit ? 1 : 0) && "invalid block argument list"); int argIndex = 0; for (size_t i = 0; i < ops.loopLowerBounds.size(); ++i) ops.loopLowerBounds[i] = args[argIndex++]; for (size_t i = 0; i < ops.loopUpperBounds.size(); ++i) ops.loopUpperBounds[i] = args[argIndex++]; for (size_t i = 0; i < ops.loopSteps.size(); ++i) ops.loopSteps[i] = args[argIndex++]; if (ops.numTeamsLower) ops.numTeamsLower = args[argIndex++]; if (ops.numTeamsUpper) ops.numTeamsUpper = args[argIndex++]; if (ops.numThreads) ops.numThreads = args[argIndex++]; if (ops.threadLimit) ops.threadLimit = args[argIndex++]; } /// Update \p clauseOps and \p ivOut with the corresponding host-evaluated /// values and Fortran symbols, respectively, if they have already been /// initialized but not yet applied. /// /// \returns whether an update was performed. If not, these clauses were not /// evaluated in the host device. bool apply(mlir::omp::LoopNestOperands &clauseOps, llvm::SmallVectorImpl &ivOut) { if (iv.empty() || loopNestApplied) { loopNestApplied = true; return false; } loopNestApplied = true; clauseOps.loopLowerBounds = ops.loopLowerBounds; clauseOps.loopUpperBounds = ops.loopUpperBounds; clauseOps.loopSteps = ops.loopSteps; ivOut.append(iv); return true; } /// Update \p clauseOps with the corresponding host-evaluated values if they /// have already been initialized but not yet applied. /// /// \returns whether an update was performed. If not, these clauses were not /// evaluated in the host device. bool apply(mlir::omp::ParallelOperands &clauseOps) { if (!ops.numThreads || parallelApplied) { parallelApplied = true; return false; } parallelApplied = true; clauseOps.numThreads = ops.numThreads; return true; } /// Update \p clauseOps with the corresponding host-evaluated values if they /// have already been initialized. /// /// \returns whether an update was performed. If not, these clauses were not /// evaluated in the host device. bool apply(mlir::omp::TeamsOperands &clauseOps) { if (!ops.numTeamsLower && !ops.numTeamsUpper && !ops.threadLimit) return false; clauseOps.numTeamsLower = ops.numTeamsLower; clauseOps.numTeamsUpper = ops.numTeamsUpper; clauseOps.threadLimit = ops.threadLimit; return true; } private: mlir::omp::HostEvaluatedOperands ops; llvm::SmallVector iv; bool loopNestApplied = false, parallelApplied = false; }; } // namespace /// Stack of \see HostEvalInfo to represent the current nest of \c omp.target /// operations being created. /// /// The current implementation prevents nested 'target' regions from breaking /// the handling of the outer region by keeping a stack of information /// structures, but it will probably still require some further work to support /// reverse offloading. static llvm::SmallVector hostEvalInfo; /// Bind symbols to their corresponding entry block arguments. /// /// The binding will be performed inside of the current block, which does not /// necessarily have to be part of the operation for which the binding is done. /// However, block arguments must be accessible. This enables controlling the /// insertion point of any new MLIR operations related to the binding of /// arguments of a loop wrapper operation. /// /// \param [in] converter - PFT to MLIR conversion interface. /// \param [in] op - owner operation of the block arguments to bind. /// \param [in] args - entry block arguments information for the given /// operation. static void bindEntryBlockArgs(lower::AbstractConverter &converter, mlir::omp::BlockArgOpenMPOpInterface op, const EntryBlockArgs &args) { assert(op != nullptr && "invalid block argument-defining operation"); assert(args.isValid() && "invalid args"); fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); auto bindSingleMapLike = [&converter, &firOpBuilder](const semantics::Symbol &sym, const mlir::BlockArgument &arg) { // Clones the `bounds` placing them inside the entry block and returns // them. auto cloneBound = [&](mlir::Value bound) { if (mlir::isMemoryEffectFree(bound.getDefiningOp())) { mlir::Operation *clonedOp = firOpBuilder.clone(*bound.getDefiningOp()); return clonedOp->getResult(0); } TODO(converter.getCurrentLocation(), "target map-like clause operand unsupported bound type"); }; auto cloneBounds = [cloneBound](llvm::ArrayRef bounds) { llvm::SmallVector clonedBounds; llvm::transform(bounds, std::back_inserter(clonedBounds), [&](mlir::Value bound) { return cloneBound(bound); }); return clonedBounds; }; fir::ExtendedValue extVal = converter.getSymbolExtendedValue(sym); auto refType = mlir::dyn_cast(arg.getType()); if (refType && fir::isa_builtin_cptr_type(refType.getElementType())) { converter.bindSymbol(sym, arg); } else { extVal.match( [&](const fir::BoxValue &v) { converter.bindSymbol(sym, fir::BoxValue(arg, cloneBounds(v.getLBounds()), v.getExplicitParameters(), v.getExplicitExtents())); }, [&](const fir::MutableBoxValue &v) { converter.bindSymbol( sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()), v.getMutableProperties())); }, [&](const fir::ArrayBoxValue &v) { converter.bindSymbol( sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()), cloneBounds(v.getLBounds()), v.getSourceBox())); }, [&](const fir::CharArrayBoxValue &v) { converter.bindSymbol( sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()), cloneBounds(v.getExtents()), cloneBounds(v.getLBounds()))); }, [&](const fir::CharBoxValue &v) { converter.bindSymbol( sym, fir::CharBoxValue(arg, cloneBound(v.getLen()))); }, [&](const fir::UnboxedValue &v) { converter.bindSymbol(sym, arg); }, [&](const auto &) { TODO(converter.getCurrentLocation(), "target map clause operand unsupported type"); }); } }; auto bindMapLike = [&bindSingleMapLike](llvm::ArrayRef syms, llvm::ArrayRef args) { // Structure component symbols don't have bindings, and can only be // explicitly mapped individually. If a member is captured implicitly // we map the entirety of the derived type when we find its symbol. llvm::SmallVector processedSyms; llvm::copy_if(syms, std::back_inserter(processedSyms), [](auto *sym) { return !sym->owner().IsDerivedType(); }); for (auto [sym, arg] : llvm::zip_equal(processedSyms, args)) bindSingleMapLike(*sym, arg); }; auto bindPrivateLike = [&converter, &firOpBuilder]( llvm::ArrayRef syms, llvm::ArrayRef vars, llvm::ArrayRef args) { llvm::SmallVector processedSyms; for (auto *sym : syms) { if (const auto *commonDet = sym->detailsIf()) { llvm::transform(commonDet->objects(), std::back_inserter(processedSyms), [&](const auto &mem) { return &*mem; }); } else { processedSyms.push_back(sym); } } for (auto [sym, var, arg] : llvm::zip_equal(processedSyms, vars, args)) converter.bindSymbol( *sym, hlfir::translateToExtendedValue( var.getLoc(), firOpBuilder, hlfir::Entity{arg}, /*contiguousHint=*/ evaluate::IsSimplyContiguous(*sym, converter.getFoldingContext())) .first); }; // Process in clause name alphabetical order to match block arguments order. // Do not bind host_eval variables because they cannot be used inside of the // corresponding region, except for very specific cases handled separately. bindMapLike(args.hasDeviceAddr.syms, op.getHasDeviceAddrBlockArgs()); bindPrivateLike(args.inReduction.syms, args.inReduction.vars, op.getInReductionBlockArgs()); bindMapLike(args.map.syms, op.getMapBlockArgs()); bindPrivateLike(args.priv.syms, args.priv.vars, op.getPrivateBlockArgs()); bindPrivateLike(args.reduction.syms, args.reduction.vars, op.getReductionBlockArgs()); bindPrivateLike(args.taskReduction.syms, args.taskReduction.vars, op.getTaskReductionBlockArgs()); bindMapLike(args.useDeviceAddr.syms, op.getUseDeviceAddrBlockArgs()); bindMapLike(args.useDevicePtr.syms, op.getUseDevicePtrBlockArgs()); } /// Get the list of base values that the specified map-like variables point to. /// /// This function must be kept in sync with changes to the `createMapInfoOp` /// utility function, since it must take into account the potential introduction /// of levels of indirection (i.e. intermediate ops). /// /// \param [in] vars - list of values passed to map-like clauses, returned /// by an `omp.map.info` operation. /// \param [out] baseOps - populated with the `var_ptr` values of the /// corresponding defining operations. static void extractMappedBaseValues(llvm::ArrayRef vars, llvm::SmallVectorImpl &baseOps) { llvm::transform(vars, std::back_inserter(baseOps), [](mlir::Value map) { auto mapInfo = map.getDefiningOp(); assert(mapInfo && "expected all map vars to be defined by omp.map.info"); mlir::Value varPtr = mapInfo.getVarPtr(); if (auto boxAddr = varPtr.getDefiningOp()) return boxAddr.getVal(); return varPtr; }); } /// Get the directive enumeration value corresponding to the given OpenMP /// construct PFT node. llvm::omp::Directive extractOmpDirective(const parser::OpenMPConstruct &ompConstruct) { return common::visit( common::visitors{ [](const parser::OpenMPAllocatorsConstruct &c) { return llvm::omp::OMPD_allocators; }, [](const parser::OpenMPAssumeConstruct &c) { return llvm::omp::OMPD_assume; }, [](const parser::OpenMPAtomicConstruct &c) { return llvm::omp::OMPD_atomic; }, [](const parser::OpenMPBlockConstruct &c) { return std::get( std::get(c.t).t) .v; }, [](const parser::OpenMPCriticalConstruct &c) { return llvm::omp::OMPD_critical; }, [](const parser::OpenMPDeclarativeAllocate &c) { return llvm::omp::OMPD_allocate; }, [](const parser::OpenMPDispatchConstruct &c) { return llvm::omp::OMPD_dispatch; }, [](const parser::OpenMPExecutableAllocate &c) { return llvm::omp::OMPD_allocate; }, [](const parser::OpenMPLoopConstruct &c) { return std::get( std::get(c.t).t) .v; }, [](const parser::OpenMPSectionConstruct &c) { return llvm::omp::OMPD_section; }, [](const parser::OpenMPSectionsConstruct &c) { return std::get( std::get(c.t).t) .v; }, [](const parser::OpenMPStandaloneConstruct &c) { return common::visit( common::visitors{ [](const parser::OpenMPSimpleStandaloneConstruct &c) { return c.v.DirId(); }, [](const parser::OpenMPFlushConstruct &c) { return llvm::omp::OMPD_flush; }, [](const parser::OpenMPCancelConstruct &c) { return llvm::omp::OMPD_cancel; }, [](const parser::OpenMPCancellationPointConstruct &c) { return llvm::omp::OMPD_cancellation_point; }, [](const parser::OmpMetadirectiveDirective &c) { return llvm::omp::OMPD_metadirective; }, [](const parser::OpenMPDepobjConstruct &c) { return llvm::omp::OMPD_depobj; }, [](const parser::OpenMPInteropConstruct &c) { return llvm::omp::OMPD_interop; }}, c.u); }, [](const parser::OpenMPUtilityConstruct &c) { return common::visit( common::visitors{[](const parser::OmpErrorDirective &c) { return llvm::omp::OMPD_error; }, [](const parser::OmpNothingDirective &c) { return llvm::omp::OMPD_nothing; }}, c.u); }}, ompConstruct.u); } /// Populate the global \see hostEvalInfo after processing clauses for the given /// \p eval OpenMP target construct, or nested constructs, if these must be /// evaluated outside of the target region per the spec. /// /// In particular, this will ensure that in 'target teams' and equivalent nested /// constructs, the \c thread_limit and \c num_teams clauses will be evaluated /// in the host. Additionally, loop bounds, steps and the \c num_threads clause /// will also be evaluated in the host if a target SPMD construct is detected /// (i.e. 'target teams distribute parallel do [simd]' or equivalent nesting). /// /// The result, stored as a global, is intended to be used to populate the \c /// host_eval operands of the associated \c omp.target operation, and also to be /// checked and used by later lowering steps to populate the corresponding /// operands of the \c omp.teams, \c omp.parallel or \c omp.loop_nest /// operations. static void processHostEvalClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval, mlir::Location loc) { // Obtain the list of clauses of the given OpenMP block or loop construct // evaluation. Other evaluations passed to this lambda keep `clauses` // unchanged. auto extractClauses = [&semaCtx](lower::pft::Evaluation &eval, List &clauses) { const auto *ompEval = eval.getIf(); if (!ompEval) return; const parser::OmpClauseList *beginClauseList = nullptr; const parser::OmpClauseList *endClauseList = nullptr; common::visit( common::visitors{ [&](const parser::OpenMPBlockConstruct &ompConstruct) { const auto &beginDirective = std::get(ompConstruct.t); beginClauseList = &std::get(beginDirective.t); endClauseList = &std::get( std::get(ompConstruct.t).t); }, [&](const parser::OpenMPLoopConstruct &ompConstruct) { const auto &beginDirective = std::get(ompConstruct.t); beginClauseList = &std::get(beginDirective.t); if (auto &endDirective = std::get>( ompConstruct.t)) endClauseList = &std::get(endDirective->t); }, [&](const auto &) {}}, ompEval->u); assert(beginClauseList && "expected begin directive"); clauses.append(makeClauses(*beginClauseList, semaCtx)); if (endClauseList) clauses.append(makeClauses(*endClauseList, semaCtx)); }; // Return the directive that is immediately nested inside of the given // `parent` evaluation, if it is its only non-end-statement nested evaluation // and it represents an OpenMP construct. auto extractOnlyOmpNestedDir = [](lower::pft::Evaluation &parent) -> std::optional { if (!parent.hasNestedEvaluations()) return std::nullopt; llvm::omp::Directive dir; auto &nested = parent.getFirstNestedEvaluation(); if (const auto *ompEval = nested.getIf()) dir = extractOmpDirective(*ompEval); else return std::nullopt; for (auto &sibling : parent.getNestedEvaluations()) if (&sibling != &nested && !sibling.isEndStmt()) return std::nullopt; return dir; }; // Process the given evaluation assuming it's part of a 'target' construct or // captured by one, and store results in the global `hostEvalInfo`. std::function &)> processEval; processEval = [&](lower::pft::Evaluation &eval, const List &clauses) { using namespace llvm::omp; ClauseProcessor cp(converter, semaCtx, clauses); // Call `processEval` recursively with the immediately nested evaluation and // its corresponding clauses if there is a single nested evaluation // representing an OpenMP directive that passes the given test. auto processSingleNestedIf = [&](llvm::function_ref test) { std::optional nestedDir = extractOnlyOmpNestedDir(eval); if (!nestedDir || !test(*nestedDir)) return; lower::pft::Evaluation &nestedEval = eval.getFirstNestedEvaluation(); List nestedClauses; extractClauses(nestedEval, nestedClauses); processEval(nestedEval, nestedClauses); }; const auto *ompEval = eval.getIf(); if (!ompEval) return; HostEvalInfo &hostInfo = hostEvalInfo.back(); switch (extractOmpDirective(*ompEval)) { case OMPD_teams_distribute_parallel_do: case OMPD_teams_distribute_parallel_do_simd: cp.processThreadLimit(stmtCtx, hostInfo.ops); [[fallthrough]]; case OMPD_target_teams_distribute_parallel_do: case OMPD_target_teams_distribute_parallel_do_simd: cp.processNumTeams(stmtCtx, hostInfo.ops); [[fallthrough]]; case OMPD_distribute_parallel_do: case OMPD_distribute_parallel_do_simd: cp.processNumThreads(stmtCtx, hostInfo.ops); [[fallthrough]]; case OMPD_distribute: case OMPD_distribute_simd: cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); break; case OMPD_teams: cp.processThreadLimit(stmtCtx, hostInfo.ops); [[fallthrough]]; case OMPD_target_teams: cp.processNumTeams(stmtCtx, hostInfo.ops); processSingleNestedIf([](Directive nestedDir) { return topDistributeSet.test(nestedDir) || topLoopSet.test(nestedDir); }); break; case OMPD_teams_distribute: case OMPD_teams_distribute_simd: cp.processThreadLimit(stmtCtx, hostInfo.ops); [[fallthrough]]; case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); cp.processNumTeams(stmtCtx, hostInfo.ops); break; case OMPD_teams_loop: cp.processThreadLimit(stmtCtx, hostInfo.ops); [[fallthrough]]; case OMPD_target_teams_loop: cp.processNumTeams(stmtCtx, hostInfo.ops); [[fallthrough]]; case OMPD_loop: cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); break; // Standalone 'target' case. case OMPD_target: { processSingleNestedIf( [](Directive nestedDir) { return topTeamsSet.test(nestedDir); }); break; } default: break; } }; assert(!hostEvalInfo.empty() && "expected HOST_EVAL info structure"); const auto *ompEval = eval.getIf(); assert(ompEval && llvm::omp::allTargetSet.test(extractOmpDirective(*ompEval)) && "expected TARGET construct evaluation"); (void)ompEval; // Use the whole list of clauses passed to the construct here, rather than the // ones only applied to omp.target. List clauses; extractClauses(eval, clauses); processEval(eval, clauses); } static lower::pft::Evaluation * getCollapsedLoopEval(lower::pft::Evaluation &eval, int collapseValue) { // Return the Evaluation of the innermost collapsed loop, or the current one // if there was no COLLAPSE. if (collapseValue == 0) return &eval; lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation(); for (int i = 1; i < collapseValue; i++) { // The nested evaluations should be DoConstructs (i.e. they should form // a loop nest). Each DoConstruct is a tuple . assert(curEval->isA()); curEval = &*std::next(curEval->getNestedEvaluations().begin()); } return curEval; } static void genNestedEvaluations(lower::AbstractConverter &converter, lower::pft::Evaluation &eval, int collapseValue = 0) { lower::pft::Evaluation *curEval = getCollapsedLoopEval(eval, collapseValue); for (lower::pft::Evaluation &e : curEval->getNestedEvaluations()) converter.genEval(e); } static fir::GlobalOp globalInitialization(lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder, const semantics::Symbol &sym, const lower::pft::Variable &var, mlir::Location currentLocation) { std::string globalName = converter.mangleName(sym); mlir::StringAttr linkage = firOpBuilder.createInternalLinkage(); return Fortran::lower::defineGlobal(converter, var, globalName, linkage); } // Get the extended value for \p val by extracting additional variable // information from \p base. static fir::ExtendedValue getExtendedValue(fir::ExtendedValue base, mlir::Value val) { return base.match( [&](const fir::MutableBoxValue &box) -> fir::ExtendedValue { return fir::MutableBoxValue(val, box.nonDeferredLenParams(), {}); }, [&](const auto &) -> fir::ExtendedValue { return fir::substBase(base, val); }); } #ifndef NDEBUG static bool isThreadPrivate(lower::SymbolRef sym) { if (const auto *details = sym->detailsIf()) { for (const auto &obj : details->objects()) if (!obj->test(semantics::Symbol::Flag::OmpThreadprivate)) return false; return true; } return sym->test(semantics::Symbol::Flag::OmpThreadprivate); } #endif static void threadPrivatizeVars(lower::AbstractConverter &converter, lower::pft::Evaluation &eval) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::Location currentLocation = converter.getCurrentLocation(); mlir::OpBuilder::InsertionGuard guard(firOpBuilder); firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); // If the symbol corresponds to the original ThreadprivateOp, use the symbol // value from that operation to create one ThreadprivateOp copy operation // inside the parallel region. // In some cases, however, the symbol will correspond to the original, // non-threadprivate variable. This can happen, for instance, with a common // block, declared in a separate module, used by a parent procedure and // privatized in its child procedure. auto genThreadprivateOp = [&](lower::SymbolRef sym) -> mlir::Value { assert(isThreadPrivate(sym)); mlir::Value symValue = converter.getSymbolAddress(sym); mlir::Operation *op = symValue.getDefiningOp(); if (auto declOp = mlir::dyn_cast(op)) op = declOp.getMemref().getDefiningOp(); if (mlir::isa(op)) symValue = mlir::dyn_cast(op).getSymAddr(); return firOpBuilder.create( currentLocation, symValue.getType(), symValue); }; llvm::SetVector threadprivateSyms; converter.collectSymbolSet(eval, threadprivateSyms, semantics::Symbol::Flag::OmpThreadprivate, /*collectSymbols=*/true, /*collectHostAssociatedSymbols=*/true); std::set threadprivateSymNames; // For a COMMON block, the ThreadprivateOp is generated for itself instead of // its members, so only bind the value of the new copied ThreadprivateOp // inside the parallel region to the common block symbol only once for // multiple members in one COMMON block. llvm::SetVector commonSyms; for (std::size_t i = 0; i < threadprivateSyms.size(); i++) { const semantics::Symbol *sym = threadprivateSyms[i]; mlir::Value symThreadprivateValue; // The variable may be used more than once, and each reference has one // symbol with the same name. Only do once for references of one variable. if (threadprivateSymNames.find(sym->name()) != threadprivateSymNames.end()) continue; threadprivateSymNames.insert(sym->name()); if (const semantics::Symbol *common = semantics::FindCommonBlockContaining(sym->GetUltimate())) { mlir::Value commonThreadprivateValue; if (commonSyms.contains(common)) { commonThreadprivateValue = converter.getSymbolAddress(*common); } else { commonThreadprivateValue = genThreadprivateOp(*common); converter.bindSymbol(*common, commonThreadprivateValue); commonSyms.insert(common); } symThreadprivateValue = lower::genCommonBlockMember( converter, currentLocation, sym->GetUltimate(), commonThreadprivateValue); } else { symThreadprivateValue = genThreadprivateOp(*sym); } fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*sym); fir::ExtendedValue symThreadprivateExv = getExtendedValue(sexv, symThreadprivateValue); converter.bindSymbol(*sym, symThreadprivateExv); } } static mlir::Operation * createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter, mlir::Location loc, mlir::Value indexVal, const semantics::Symbol *sym) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint(); firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); mlir::Type tempTy = converter.genType(*sym); assert(converter.isPresentShallowLookup(*sym) && "Expected symbol to be in symbol table."); firOpBuilder.restoreInsertionPoint(insPt); mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal); hlfir::Entity lhs{converter.getSymbolAddress(*sym)}; lhs = hlfir::derefPointersAndAllocatables(loc, firOpBuilder, lhs); mlir::Operation *storeOp = firOpBuilder.create(loc, cvtVal, lhs); return storeOp; } // This helper function implements the functionality of "promoting" non-CPTR // arguments of use_device_ptr to use_device_addr arguments (automagic // conversion of use_device_ptr -> use_device_addr in these cases). The way we // do so currently is through the shuffling of operands from the // devicePtrOperands to deviceAddrOperands, as well as the types, locations and // symbols. // // This effectively implements some deprecated OpenMP functionality that some // legacy applications unfortunately depend on (deprecated in specification // version 5.2): // // "If a list item in a use_device_ptr clause is not of type C_PTR, the behavior // is as if the list item appeared in a use_device_addr clause. Support for // such list items in a use_device_ptr clause is deprecated." static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr( llvm::SmallVectorImpl &useDeviceAddrVars, llvm::SmallVectorImpl &useDeviceAddrSyms, llvm::SmallVectorImpl &useDevicePtrVars, llvm::SmallVectorImpl &useDevicePtrSyms) { // Iterate over our use_device_ptr list and shift all non-cptr arguments into // use_device_addr. auto *varIt = useDevicePtrVars.begin(); auto *symIt = useDevicePtrSyms.begin(); while (varIt != useDevicePtrVars.end()) { if (fir::isa_builtin_cptr_type(fir::unwrapRefType(varIt->getType()))) { ++varIt; ++symIt; continue; } useDeviceAddrVars.push_back(*varIt); useDeviceAddrSyms.push_back(*symIt); varIt = useDevicePtrVars.erase(varIt); symIt = useDevicePtrSyms.erase(symIt); } } /// Extract the list of function and variable symbols affected by the given /// 'declare target' directive and return the intended device type for them. static void getDeclareTargetInfo( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, mlir::omp::DeclareTargetOperands &clauseOps, llvm::SmallVectorImpl &symbolAndClause) { const auto &spec = std::get(declareTargetConstruct.t); if (const auto *objectList{parser::Unwrap(spec.u)}) { ObjectList objects{makeObjects(*objectList, semaCtx)}; // Case: declare target(func, var1, var2) gatherFuncAndVarSyms(objects, mlir::omp::DeclareTargetCaptureClause::to, symbolAndClause); } else if (const auto *clauseList{ parser::Unwrap(spec.u)}) { List clauses = makeClauses(*clauseList, semaCtx); if (clauses.empty()) { Fortran::lower::pft::FunctionLikeUnit *owningProc = eval.getOwningProcedure(); if (owningProc && (!owningProc->isMainProgram() || owningProc->getMainProgramSymbol())) { // Case: declare target, implicit capture of function symbolAndClause.emplace_back(mlir::omp::DeclareTargetCaptureClause::to, owningProc->getSubprogramSymbol()); } } ClauseProcessor cp(converter, semaCtx, clauses); cp.processDeviceType(clauseOps); cp.processEnter(symbolAndClause); cp.processLink(symbolAndClause); cp.processTo(symbolAndClause); cp.processTODO(converter.getCurrentLocation(), llvm::omp::Directive::OMPD_declare_target); } } static void collectDeferredDeclareTargets( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, llvm::SmallVectorImpl &deferredDeclareTarget) { mlir::omp::DeclareTargetOperands clauseOps; llvm::SmallVector symbolAndClause; getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, clauseOps, symbolAndClause); // Return the device type only if at least one of the targets for the // directive is a function or subroutine mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); for (const DeclareTargetCapturePair &symClause : symbolAndClause) { mlir::Operation *op = mod.lookupSymbol( converter.mangleName(std::get(symClause))); if (!op) { deferredDeclareTarget.push_back({std::get<0>(symClause), clauseOps.deviceType, std::get<1>(symClause)}); } } } static std::optional getDeclareTargetFunctionDevice( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) { mlir::omp::DeclareTargetOperands clauseOps; llvm::SmallVector symbolAndClause; getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, clauseOps, symbolAndClause); // Return the device type only if at least one of the targets for the // directive is a function or subroutine mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); for (const DeclareTargetCapturePair &symClause : symbolAndClause) { mlir::Operation *op = mod.lookupSymbol( converter.mangleName(std::get(symClause))); if (mlir::isa_and_nonnull(op)) return clauseOps.deviceType; } return std::nullopt; } /// Set up the entry block of the given `omp.loop_nest` operation, adding a /// block argument for each loop induction variable and allocating and /// initializing a private value to hold each of them. /// /// This function can also bind the symbols of any variables that should match /// block arguments on parent loop wrapper operations attached to the same /// loop. This allows the introduction of any necessary `hlfir.declare` /// operations inside of the entry block of the `omp.loop_nest` operation and /// not directly under any of the wrappers, which would invalidate them. /// /// \param [in] op - the loop nest operation. /// \param [in] converter - PFT to MLIR conversion interface. /// \param [in] loc - location. /// \param [in] args - symbols of induction variables. /// \param [in] wrapperArgs - list of parent loop wrappers and their associated /// entry block arguments. static void genLoopVars( mlir::Operation *op, lower::AbstractConverter &converter, mlir::Location &loc, llvm::ArrayRef args, llvm::ArrayRef< std::pair> wrapperArgs = {}) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); auto ®ion = op->getRegion(0); std::size_t loopVarTypeSize = 0; for (const semantics::Symbol *arg : args) loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); llvm::SmallVector tiv(args.size(), loopVarType); llvm::SmallVector locs(args.size(), loc); firOpBuilder.createBlock(®ion, {}, tiv, locs); // Update nested wrapper operands if parent wrappers have mapped these values // to block arguments. // // Binding these values earlier would take care of this, but we cannot rely on // that approach because binding in between the creation of a wrapper and the // next one would result in 'hlfir.declare' operations being introduced inside // of a wrapper, which is illegal. mlir::IRMapping mapper; for (auto [argGeneratingOp, blockArgs] : wrapperArgs) { for (mlir::OpOperand &operand : argGeneratingOp->getOpOperands()) operand.set(mapper.lookupOrDefault(operand.get())); for (const auto [arg, var] : llvm::zip_equal( argGeneratingOp->getRegion(0).getArguments(), blockArgs.getVars())) mapper.map(var, arg); } // Bind the entry block arguments of parent wrappers to the corresponding // symbols. for (auto [argGeneratingOp, blockArgs] : wrapperArgs) bindEntryBlockArgs(converter, argGeneratingOp, blockArgs); // The argument is not currently in memory, so make a temporary for the // argument, and store it there, then bind that location to the argument. mlir::Operation *storeOp = nullptr; for (auto [argIndex, argSymbol] : llvm::enumerate(args)) { mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex)); storeOp = createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); } firOpBuilder.setInsertionPointAfter(storeOp); } static void markDeclareTarget(mlir::Operation *op, lower::AbstractConverter &converter, mlir::omp::DeclareTargetCaptureClause captureClause, mlir::omp::DeclareTargetDeviceType deviceType) { // TODO: Add support for program local variables with declare target applied auto declareTargetOp = llvm::dyn_cast(op); if (!declareTargetOp) fir::emitFatalError( converter.getCurrentLocation(), "Attempt to apply declare target on unsupported operation"); // The function or global already has a declare target applied to it, very // likely through implicit capture (usage in another declare target // function/subroutine). It should be marked as any if it has been assigned // both host and nohost, else we skip, as there is no change if (declareTargetOp.isDeclareTarget()) { if (declareTargetOp.getDeclareTargetDeviceType() != deviceType) declareTargetOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::any, captureClause); return; } declareTargetOp.setDeclareTarget(deviceType, captureClause); } //===----------------------------------------------------------------------===// // Op body generation helper structures and functions //===----------------------------------------------------------------------===// struct OpWithBodyGenInfo { /// A type for a code-gen callback function. This takes as argument the op for /// which the code is being generated and returns the arguments of the op's /// region. using GenOMPRegionEntryCBFn = std::function( mlir::Operation *)>; OpWithBodyGenInfo(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, mlir::Location loc, lower::pft::Evaluation &eval, llvm::omp::Directive dir) : converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc), eval(eval), dir(dir) {} OpWithBodyGenInfo &setClauses(const List *value) { clauses = value; return *this; } OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) { dsp = value; return *this; } OpWithBodyGenInfo &setEntryBlockArgs(const EntryBlockArgs *value) { blockArgs = value; return *this; } OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) { genRegionEntryCB = value; return *this; } OpWithBodyGenInfo &setGenSkeletonOnly(bool value) { genSkeletonOnly = value; return *this; } /// [inout] converter to use for the clauses. lower::AbstractConverter &converter; /// [in] Symbol table lower::SymMap &symTable; /// [in] Semantics context semantics::SemanticsContext &semaCtx; /// [in] location in source code. mlir::Location loc; /// [in] current PFT node/evaluation. lower::pft::Evaluation &eval; /// [in] leaf directive for which to generate the op body. llvm::omp::Directive dir; /// [in] list of clauses to process. const List *clauses = nullptr; /// [in] if provided, processes the construct's data-sharing attributes. DataSharingProcessor *dsp = nullptr; /// [in] if provided, it is used to create the op's region entry block. It is /// overriden when a \see genRegionEntryCB is provided. This is only valid for /// operations implementing the \see mlir::omp::BlockArgOpenMPOpInterface. const EntryBlockArgs *blockArgs = nullptr; /// [in] if provided, it overrides the default op's region entry block /// creation. GenOMPRegionEntryCBFn genRegionEntryCB = nullptr; /// [in] if set to `true`, skip generating nested evaluations and dispatching /// any further leaf constructs. bool genSkeletonOnly = false; }; /// Create the body (block) for an OpenMP Operation. /// /// \param [in] op - the operation the body belongs to. /// \param [in] info - options controlling code-gen for the construction. /// \param [in] queue - work queue with nested constructs. /// \param [in] item - item in the queue to generate body for. static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, const ConstructQueue &queue, ConstructQueue::const_iterator item) { fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); auto insertMarker = [](fir::FirOpBuilder &builder) { mlir::Value undef = builder.create(builder.getUnknownLoc(), builder.getIndexType()); return undef.getDefiningOp(); }; // Create the entry block for the region and collect its arguments for use // within the region. The entry block will be created as follows: // - By default, it will be empty and have no arguments. // - Operations implementing the omp::BlockArgOpenMPOpInterface can set the // `info.blockArgs` pointer so that block arguments will be those // corresponding to entry block argument-generating clauses. Binding of // Fortran symbols to the new MLIR values is done automatically. // - If the `info.genRegionEntryCB` callback is set, it takes precedence and // allows callers to manually create the entry block with its intended // list of arguments and to bind these arguments to their corresponding // Fortran symbols. This is used for e.g. loop induction variables. auto regionArgs = [&]() -> llvm::SmallVector { if (info.genRegionEntryCB) return info.genRegionEntryCB(&op); if (info.blockArgs) { genEntryBlock(firOpBuilder, *info.blockArgs, op.getRegion(0)); bindEntryBlockArgs(info.converter, llvm::cast(op), *info.blockArgs); return llvm::to_vector(info.blockArgs->getSyms()); } firOpBuilder.createBlock(&op.getRegion(0)); return {}; }(); // Mark the earliest insertion point. mlir::Operation *marker = insertMarker(firOpBuilder); // If it is an unstructured region, create empty blocks for all evaluations. if (lower::omp::isLastItemInQueue(item, queue) && info.eval.lowerAsUnstructured()) { lower::createEmptyRegionBlocks( firOpBuilder, info.eval.getNestedEvaluations()); } // Start with privatization, so that the lowering of the nested // code will use the right symbols. bool isLoop = llvm::omp::getDirectiveAssociation(info.dir) == llvm::omp::Association::Loop; bool privatize = info.clauses; firOpBuilder.setInsertionPoint(marker); std::optional tempDsp; if (privatize && !info.dsp) { tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval, Fortran::lower::omp::isLastItemInQueue(item, queue), /*useDelayedPrivatization=*/false, info.symTable); tempDsp->processStep1(); } if (info.dir == llvm::omp::Directive::OMPD_parallel) { threadPrivatizeVars(info.converter, info.eval); if (info.clauses) { firOpBuilder.setInsertionPoint(marker); ClauseProcessor(info.converter, info.semaCtx, *info.clauses) .processCopyin(); } } if (!info.genSkeletonOnly) { if (ConstructQueue::const_iterator next = std::next(item); next != queue.end()) { genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval, info.loc, queue, next); } else { // genFIR(Evaluation&) tries to patch up unterminated blocks, causing // a lot of complications for our approach if the terminator generation // is delayed past this point. Insert a temporary terminator here, then // delete it. firOpBuilder.setInsertionPointToEnd(&op.getRegion(0).back()); auto *temp = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc); firOpBuilder.setInsertionPointAfter(marker); genNestedEvaluations(info.converter, info.eval); temp->erase(); } } // Get or create a unique exiting block from the given region, or // return nullptr if there is no exiting block. auto getUniqueExit = [&](mlir::Region ®ion) -> mlir::Block * { // Find the blocks where the OMP terminator should go. In simple cases // it is the single block in the operation's region. When the region // is more complicated, especially with unstructured control flow, there // may be multiple blocks, and some of them may have non-OMP terminators // resulting from lowering of the code contained within the operation. // All the remaining blocks are potential exit points from the op's region. // // Explicit control flow cannot exit any OpenMP region (other than via // STOP), and that is enforced by semantic checks prior to lowering. STOP // statements are lowered to a function call. // Collect unterminated blocks. llvm::SmallVector exits; for (mlir::Block &b : region) { if (b.empty() || !b.back().hasTrait()) exits.push_back(&b); } if (exits.empty()) return nullptr; // If there already is a unique exiting block, do not create another one. // Additionally, some ops (e.g. omp.sections) require only 1 block in // its region. if (exits.size() == 1) return exits[0]; mlir::Block *exit = firOpBuilder.createBlock(®ion); for (mlir::Block *b : exits) { firOpBuilder.setInsertionPointToEnd(b); firOpBuilder.create(info.loc, exit); } return exit; }; if (auto *exitBlock = getUniqueExit(op.getRegion(0))) { firOpBuilder.setInsertionPointToEnd(exitBlock); auto *term = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc); // Only insert lastprivate code when there actually is an exit block. // Such a block may not exist if the nested code produced an infinite // loop (this may not make sense in production code, but a user could // write that and we should handle it). firOpBuilder.setInsertionPoint(term); if (privatize) { // DataSharingProcessor::processStep2() may create operations before/after // the one passed as argument. We need to treat loop wrappers and their // nested loop as a unit, so we need to pass the bottom level wrapper (if // present). Otherwise, these operations will be inserted within a // wrapper region. mlir::Operation *privatizationBottomLevelOp = &op; if (auto loopNest = llvm::dyn_cast(op)) { llvm::SmallVector wrappers; loopNest.gatherWrappers(wrappers); if (!wrappers.empty()) privatizationBottomLevelOp = &*wrappers.front(); } if (!info.dsp) { assert(tempDsp.has_value()); tempDsp->processStep2(privatizationBottomLevelOp, isLoop); } else { if (isLoop && regionArgs.size() > 0) { for (const auto ®ionArg : regionArgs) { info.dsp->pushLoopIV(info.converter.getSymbolAddress(*regionArg)); } } info.dsp->processStep2(privatizationBottomLevelOp, isLoop); } } } firOpBuilder.setInsertionPointAfter(marker); marker->erase(); } static void genBodyOfTargetDataOp( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::omp::TargetDataOp &dataOp, const EntryBlockArgs &args, const mlir::Location ¤tLocation, const ConstructQueue &queue, ConstructQueue::const_iterator item) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); genEntryBlock(firOpBuilder, args, dataOp.getRegion()); bindEntryBlockArgs(converter, dataOp, args); // Insert dummy instruction to remember the insertion position. The // marker will be deleted by clean up passes since there are no uses. // Remembering the position for further insertion is important since // there are hlfir.declares inserted above while setting block arguments // and new code from the body should be inserted after that. mlir::Value undefMarker = firOpBuilder.create( dataOp.getLoc(), firOpBuilder.getIndexType()); // Create blocks for unstructured regions. This has to be done since // blocks are initially allocated with the function as the parent region. if (eval.lowerAsUnstructured()) { lower::createEmptyRegionBlocks( firOpBuilder, eval.getNestedEvaluations()); } firOpBuilder.create(currentLocation); // Set the insertion point after the marker. firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); if (ConstructQueue::const_iterator next = std::next(item); next != queue.end()) { genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, next); } else { genNestedEvaluations(converter, eval); } } // This generates intermediate common block member accesses within a region // and then rebinds the members symbol to the intermediate accessors we have // generated so that subsequent code generation will utilise these instead. // // When the scope changes, the bindings to the intermediate accessors should // be dropped in place of the original symbol bindings. // // This is for utilisation with TargetOp. static void genIntermediateCommonBlockAccessors( Fortran::lower::AbstractConverter &converter, const mlir::Location ¤tLocation, llvm::ArrayRef mapBlockArgs, llvm::ArrayRef mapSyms) { // Iterate over the symbol list, which will be shorter than the list of // arguments if new entry block arguments were introduced to implicitly map // outside values used by the bounds cloned into the target region. In that // case, the additional block arguments do not need processing here. for (auto [mapSym, mapArg] : llvm::zip_first(mapSyms, mapBlockArgs)) { auto *details = mapSym->detailsIf(); if (!details) continue; for (auto obj : details->objects()) { auto targetCBMemberBind = Fortran::lower::genCommonBlockMember( converter, currentLocation, *obj, mapArg); fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*obj); fir::ExtendedValue targetCBExv = getExtendedValue(sexv, targetCBMemberBind); converter.bindSymbol(*obj, targetCBExv); } } } // This functions creates a block for the body of the targetOp's region. It adds // all the symbols present in mapSymbols as block arguments to this block. static void genBodyOfTargetOp( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::omp::TargetOp &targetOp, const EntryBlockArgs &args, const mlir::Location ¤tLocation, const ConstructQueue &queue, ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); auto argIface = llvm::cast(*targetOp); mlir::Region ®ion = targetOp.getRegion(); mlir::Block *entryBlock = genEntryBlock(firOpBuilder, args, region); bindEntryBlockArgs(converter, targetOp, args); if (!hostEvalInfo.empty()) hostEvalInfo.back().bindOperands(argIface.getHostEvalBlockArgs()); // Check if cloning the bounds introduced any dependency on the outer region. // If so, then either clone them as well if they are MemoryEffectFree, or else // copy them to a new temporary and add them to the map and block_argument // lists and replace their uses with the new temporary. llvm::SetVector valuesDefinedAbove; mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); while (!valuesDefinedAbove.empty()) { for (mlir::Value val : valuesDefinedAbove) { mlir::Operation *valOp = val.getDefiningOp(); assert(valOp != nullptr); // NOTE: We skip BoxDimsOp's as the lesser of two evils is to map the // indices separately, as the alternative is to eventually map the Box, // which comes with a fairly large overhead comparatively. We could be // more robust about this and check using a BackwardsSlice to see if we // run the risk of mapping a box. if (mlir::isMemoryEffectFree(valOp) && !mlir::isa(valOp)) { mlir::Operation *clonedOp = valOp->clone(); entryBlock->push_front(clonedOp); auto replace = [entryBlock](mlir::OpOperand &use) { return use.getOwner()->getBlock() == entryBlock; }; valOp->getResults().replaceUsesWithIf(clonedOp->getResults(), replace); valOp->replaceUsesWithIf(clonedOp, replace); } else { auto savedIP = firOpBuilder.getInsertionPoint(); firOpBuilder.setInsertionPointAfter(valOp); auto copyVal = firOpBuilder.createTemporary(val.getLoc(), val.getType()); firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( firOpBuilder, val, /*isOptional=*/false, val.getLoc()); llvm::SmallVector bounds = fir::factory::genImplicitBoundsOps( firOpBuilder, info, hlfir::translateToExtendedValue(val.getLoc(), firOpBuilder, hlfir::Entity{val}) .first, /*dataExvIsAssumedSize=*/false, val.getLoc()); std::stringstream name; firOpBuilder.setInsertionPoint(targetOp); llvm::omp::OpenMPOffloadMappingFlags mapFlag = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; mlir::omp::VariableCaptureKind captureKind = mlir::omp::VariableCaptureKind::ByRef; mlir::Type eleType = copyVal.getType(); if (auto refType = mlir::dyn_cast(copyVal.getType())) eleType = refType.getElementType(); if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { captureKind = mlir::omp::VariableCaptureKind::ByCopy; } else if (!fir::isa_builtin_cptr_type(eleType)) { mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; } mlir::Value mapOp = createMapInfoOp( firOpBuilder, copyVal.getLoc(), copyVal, /*varPtrPtr=*/mlir::Value{}, name.str(), bounds, /*members=*/llvm::SmallVector{}, /*membersIndex=*/mlir::ArrayAttr{}, static_cast< std::underlying_type_t>( mapFlag), captureKind, copyVal.getType()); // Get the index of the first non-map argument before modifying mapVars, // then append an element to mapVars and an associated entry block // argument at that index. unsigned insertIndex = argIface.getMapBlockArgsStart() + argIface.numMapBlockArgs(); targetOp.getMapVarsMutable().append(mapOp); mlir::Value clonedValArg = region.insertArgument( insertIndex, copyVal.getType(), copyVal.getLoc()); firOpBuilder.setInsertionPointToStart(entryBlock); auto loadOp = firOpBuilder.create(clonedValArg.getLoc(), clonedValArg); val.replaceUsesWithIf(loadOp->getResult(0), [entryBlock](mlir::OpOperand &use) { return use.getOwner()->getBlock() == entryBlock; }); firOpBuilder.setInsertionPoint(entryBlock, savedIP); } } valuesDefinedAbove.clear(); mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); } // Insert dummy instruction to remember the insertion position. The // marker will be deleted since there are not uses. // In the HLFIR flow there are hlfir.declares inserted above while // setting block arguments. mlir::Value undefMarker = firOpBuilder.create( targetOp.getLoc(), firOpBuilder.getIndexType()); // Create blocks for unstructured regions. This has to be done since // blocks are initially allocated with the function as the parent region. if (lower::omp::isLastItemInQueue(item, queue) && eval.lowerAsUnstructured()) { lower::createEmptyRegionBlocks( firOpBuilder, eval.getNestedEvaluations()); } firOpBuilder.create(currentLocation); // Create the insertion point after the marker. firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); // If we map a common block using it's symbol e.g. map(tofrom: /common_block/) // and accessing its members within the target region, there is a large // chance we will end up with uses external to the region accessing the common // resolve these, we do so by generating new common block member accesses // within the region, binding them to the member symbol for the scope of the // region so that subsequent code generation within the region will utilise // our new member accesses we have created. genIntermediateCommonBlockAccessors( converter, currentLocation, argIface.getMapBlockArgs(), args.map.syms); if (ConstructQueue::const_iterator next = std::next(item); next != queue.end()) { genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, next); } else { genNestedEvaluations(converter, eval); } dsp.processStep2(targetOp, /*isLoop=*/false); } template static OpTy genOpWithBody(const OpWithBodyGenInfo &info, const ConstructQueue &queue, ConstructQueue::const_iterator item, Args &&...args) { auto op = info.converter.getFirOpBuilder().create( info.loc, std::forward(args)...); createBodyOfOp(*op, info, queue, item); return op; } template static OpTy genWrapperOp(lower::AbstractConverter &converter, mlir::Location loc, const ClauseOpsTy &clauseOps, const EntryBlockArgs &args) { static_assert( OpTy::template hasTrait(), "expected a loop wrapper"); fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // Create wrapper. auto op = firOpBuilder.create(loc, clauseOps); // Create entry block with arguments. genEntryBlock(firOpBuilder, args, op.getRegion()); return op; } //===----------------------------------------------------------------------===// // Code generation functions for clauses //===----------------------------------------------------------------------===// static void genCancelClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::CancelOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processCancelDirectiveName(clauseOps); cp.processIf(llvm::omp::Directive::OMPD_cancel, clauseOps); } static void genCancellationPointClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::CancellationPointOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processCancelDirectiveName(clauseOps); } static void genCriticalDeclareClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::CriticalDeclareOperands &clauseOps, llvm::StringRef name) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processHint(clauseOps); clauseOps.symName = mlir::StringAttr::get(converter.getFirOpBuilder().getContext(), name); } static void genDistributeClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::DistributeOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processDistSchedule(stmtCtx, clauseOps); cp.processOrder(clauseOps); } static void genFlushClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const ObjectList &objects, const List &clauses, mlir::Location loc, llvm::SmallVectorImpl &operandRange) { if (!objects.empty()) genObjectList(objects, converter, operandRange); ClauseProcessor cp(converter, semaCtx, clauses); cp.processTODO(loc, llvm::omp::OMPD_flush); } static void genLoopNestClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const List &clauses, mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps, llvm::SmallVectorImpl &iv) { ClauseProcessor cp(converter, semaCtx, clauses); if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps, iv)) cp.processCollapse(loc, eval, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); } static void genLoopClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::LoopOperands &clauseOps, llvm::SmallVectorImpl &reductionSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processBind(clauseOps); cp.processOrder(clauseOps); cp.processReduction(loc, clauseOps, reductionSyms); cp.processTODO(loc, llvm::omp::Directive::OMPD_loop); } static void genMaskedClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::MaskedOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processFilter(stmtCtx, clauseOps); } static void genOrderedRegionClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::OrderedRegionOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processTODO(loc, llvm::omp::Directive::OMPD_ordered); } static void genParallelClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::ParallelOperands &clauseOps, llvm::SmallVectorImpl &reductionSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processIf(llvm::omp::Directive::OMPD_parallel, clauseOps); if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps)) cp.processNumThreads(stmtCtx, clauseOps); cp.processProcBind(clauseOps); cp.processReduction(loc, clauseOps, reductionSyms); } static void genScanClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::ScanOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processInclusive(loc, clauseOps); cp.processExclusive(loc, clauseOps); } static void genSectionsClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::SectionsOperands &clauseOps, llvm::SmallVectorImpl &reductionSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processNowait(clauseOps); cp.processReduction(loc, clauseOps, reductionSyms); // TODO Support delayed privatization. } static void genSimdClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::SimdOperands &clauseOps, llvm::SmallVectorImpl &reductionSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAligned(clauseOps); cp.processIf(llvm::omp::Directive::OMPD_simd, clauseOps); cp.processNontemporal(clauseOps); cp.processOrder(clauseOps); cp.processReduction(loc, clauseOps, reductionSyms); cp.processSafelen(clauseOps); cp.processSimdlen(clauseOps); cp.processTODO(loc, llvm::omp::Directive::OMPD_simd); } static void genSingleClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::SingleOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processCopyprivate(loc, clauseOps); cp.processNowait(clauseOps); // TODO Support delayed privatization. } static void genTargetClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::SymMap &symTable, lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval, const List &clauses, mlir::Location loc, mlir::omp::TargetOperands &clauseOps, llvm::SmallVectorImpl &hasDeviceAddrSyms, llvm::SmallVectorImpl &isDevicePtrSyms, llvm::SmallVectorImpl &mapSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processBare(clauseOps); cp.processDepend(symTable, stmtCtx, clauseOps); cp.processDevice(stmtCtx, clauseOps); cp.processHasDeviceAddr(stmtCtx, clauseOps, hasDeviceAddrSyms); if (!hostEvalInfo.empty()) { // Only process host_eval if compiling for the host device. processHostEvalClauses(converter, semaCtx, stmtCtx, eval, loc); hostEvalInfo.back().collectValues(clauseOps.hostEvalVars); } cp.processIf(llvm::omp::Directive::OMPD_target, clauseOps); cp.processIsDevicePtr(clauseOps, isDevicePtrSyms); cp.processMap(loc, stmtCtx, clauseOps, &mapSyms); cp.processNowait(clauseOps); cp.processThreadLimit(stmtCtx, clauseOps); cp.processTODO(loc, llvm::omp::Directive::OMPD_target); // `target private(..)` is only supported in delayed privatization mode. if (!enableDelayedPrivatizationStaging) cp.processTODO( loc, llvm::omp::Directive::OMPD_target); } static void genTargetDataClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::TargetDataOperands &clauseOps, llvm::SmallVectorImpl &useDeviceAddrSyms, llvm::SmallVectorImpl &useDevicePtrSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processDevice(stmtCtx, clauseOps); cp.processIf(llvm::omp::Directive::OMPD_target_data, clauseOps); cp.processMap(loc, stmtCtx, clauseOps); cp.processUseDeviceAddr(stmtCtx, clauseOps, useDeviceAddrSyms); cp.processUseDevicePtr(stmtCtx, clauseOps, useDevicePtrSyms); // This function implements the deprecated functionality of use_device_ptr // that allows users to provide non-CPTR arguments to it with the caveat // that the compiler will treat them as use_device_addr. A lot of legacy // code may still depend on this functionality, so we should support it // in some manner. We do so currently by simply shifting non-cptr operands // from the use_device_ptr lists into the use_device_addr lists. // TODO: Perhaps create a user provideable compiler option that will // re-introduce a hard-error rather than a warning in these cases. promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr( clauseOps.useDeviceAddrVars, useDeviceAddrSyms, clauseOps.useDevicePtrVars, useDevicePtrSyms); } static void genTargetEnterExitUpdateDataClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::SymMap &symTable, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, llvm::omp::Directive directive, mlir::omp::TargetEnterExitUpdateDataOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processDepend(symTable, stmtCtx, clauseOps); cp.processDevice(stmtCtx, clauseOps); cp.processIf(directive, clauseOps); if (directive == llvm::omp::Directive::OMPD_target_update) cp.processMotionClauses(stmtCtx, clauseOps); else cp.processMap(loc, stmtCtx, clauseOps); cp.processNowait(clauseOps); } static void genTaskClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::SymMap &symTable, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::TaskOperands &clauseOps, llvm::SmallVectorImpl &inReductionSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processDepend(symTable, stmtCtx, clauseOps); cp.processFinal(stmtCtx, clauseOps); cp.processIf(llvm::omp::Directive::OMPD_task, clauseOps); cp.processInReduction(loc, clauseOps, inReductionSyms); cp.processMergeable(clauseOps); cp.processPriority(stmtCtx, clauseOps); cp.processUntied(clauseOps); cp.processDetach(clauseOps); cp.processTODO(loc, llvm::omp::Directive::OMPD_task); } static void genTaskgroupClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::TaskgroupOperands &clauseOps, llvm::SmallVectorImpl &taskReductionSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processTaskReduction(loc, clauseOps, taskReductionSyms); } static void genTaskloopClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::TaskloopOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processGrainsize(stmtCtx, clauseOps); cp.processNumTasks(stmtCtx, clauseOps); cp.processTODO(loc, llvm::omp::Directive::OMPD_taskloop); } static void genTaskwaitClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::TaskwaitOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processTODO( loc, llvm::omp::Directive::OMPD_taskwait); } static void genWorkshareClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::WorkshareOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processNowait(clauseOps); } static void genTeamsClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::TeamsOperands &clauseOps, llvm::SmallVectorImpl &reductionSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processIf(llvm::omp::Directive::OMPD_teams, clauseOps); if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps)) { cp.processNumTeams(stmtCtx, clauseOps); cp.processThreadLimit(stmtCtx, clauseOps); } cp.processReduction(loc, clauseOps, reductionSyms); // TODO Support delayed privatization. } static void genWsloopClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::WsloopOperands &clauseOps, llvm::SmallVectorImpl &reductionSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processNowait(clauseOps); cp.processOrder(clauseOps); cp.processOrdered(clauseOps); cp.processReduction(loc, clauseOps, reductionSyms); cp.processSchedule(stmtCtx, clauseOps); cp.processTODO( loc, llvm::omp::Directive::OMPD_do); } //===----------------------------------------------------------------------===// // Code generation functions for leaf constructs //===----------------------------------------------------------------------===// static mlir::omp::BarrierOp genBarrierOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { return converter.getFirOpBuilder().create(loc); } static mlir::omp::CancelOp genCancelOp(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::CancelOperands clauseOps; genCancelClauses(converter, semaCtx, item->clauses, loc, clauseOps); return converter.getFirOpBuilder().create(loc, clauseOps); } static mlir::omp::CancellationPointOp genCancellationPointOp( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::CancellationPointOperands clauseOps; genCancellationPointClauses(converter, semaCtx, item->clauses, loc, clauseOps); return converter.getFirOpBuilder().create( loc, clauseOps); } static mlir::omp::CriticalOp genCriticalOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, const std::optional &name) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::FlatSymbolRefAttr nameAttr; if (name) { std::string nameStr = name->ToString(); mlir::ModuleOp mod = firOpBuilder.getModule(); auto global = mod.lookupSymbol(nameStr); if (!global) { mlir::omp::CriticalDeclareOperands clauseOps; genCriticalDeclareClauses(converter, semaCtx, item->clauses, loc, clauseOps, nameStr); mlir::OpBuilder modBuilder(mod.getBodyRegion()); global = modBuilder.create(loc, clauseOps); } nameAttr = mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(), global.getSymName()); } return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_critical), queue, item, nameAttr); } static mlir::omp::FlushOp genFlushOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ObjectList &objects, const ConstructQueue &queue, ConstructQueue::const_iterator item) { llvm::SmallVector operandRange; genFlushClauses(converter, semaCtx, objects, item->clauses, loc, operandRange); return converter.getFirOpBuilder().create( converter.getCurrentLocation(), operandRange); } static mlir::omp::LoopNestOp genLoopNestOp( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, mlir::omp::LoopNestOperands &clauseOps, llvm::ArrayRef iv, llvm::ArrayRef< std::pair> wrapperArgs, llvm::omp::Directive directive, DataSharingProcessor &dsp) { auto ivCallback = [&](mlir::Operation *op) { genLoopVars(op, converter, loc, iv, wrapperArgs); return llvm::SmallVector(iv); }; auto *nestedEval = getCollapsedLoopEval(eval, getCollapseValue(item->clauses)); return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, directive) .setClauses(&item->clauses) .setDataSharingProcessor(&dsp) .setGenRegionEntryCb(ivCallback), queue, item, clauseOps); } static mlir::omp::LoopOp genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::LoopOperands loopClauseOps; llvm::SmallVector loopReductionSyms; genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps, loopReductionSyms); DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, /*useDelayedPrivatization=*/true, symTable); dsp.processStep1(&loopClauseOps); mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, loopNestClauseOps, iv); EntryBlockArgs loopArgs; loopArgs.priv.syms = dsp.getDelayedPrivSymbols(); loopArgs.priv.vars = loopClauseOps.privateVars; loopArgs.reduction.syms = loopReductionSyms; loopArgs.reduction.vars = loopClauseOps.reductionVars; auto loopOp = genWrapperOp(converter, loc, loopClauseOps, loopArgs); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, loopNestClauseOps, iv, {{loopOp, loopArgs}}, llvm::omp::Directive::OMPD_loop, dsp); return loopOp; } static mlir::omp::MaskedOp genMaskedOp(lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::MaskedOperands clauseOps; genMaskedClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_masked), queue, item, clauseOps); } static mlir::omp::MasterOp genMasterOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_master), queue, item); } static mlir::omp::OrderedOp genOrderedOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { TODO(loc, "OMPD_ordered"); return nullptr; } static mlir::omp::OrderedRegionOp genOrderedRegionOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::OrderedRegionOperands clauseOps; genOrderedRegionClauses(converter, semaCtx, item->clauses, loc, clauseOps); return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_ordered), queue, item, clauseOps); } static mlir::omp::ParallelOp genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, mlir::omp::ParallelOperands &clauseOps, const EntryBlockArgs &args, DataSharingProcessor *dsp, bool isComposite = false) { assert((!enableDelayedPrivatization || dsp) && "expected valid DataSharingProcessor"); OpWithBodyGenInfo genInfo = OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_parallel) .setClauses(&item->clauses) .setEntryBlockArgs(&args) .setGenSkeletonOnly(isComposite) .setDataSharingProcessor(dsp); auto parallelOp = genOpWithBody(genInfo, queue, item, clauseOps); parallelOp.setComposite(isComposite); return parallelOp; } static mlir::omp::ScanOp genScanOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::ScanOperands clauseOps; genScanClauses(converter, semaCtx, item->clauses, loc, clauseOps); return converter.getFirOpBuilder().create( converter.getCurrentLocation(), clauseOps); } /// This breaks the normal prototype of the gen*Op functions: adding the /// sectionBlocks argument so that the enclosed section constructs can be /// lowered here with correct reduction symbol remapping. static mlir::omp::SectionsOp genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, const parser::OmpSectionBlocks §ionBlocks) { mlir::omp::SectionsOperands clauseOps; llvm::SmallVector reductionSyms; genSectionsClauses(converter, semaCtx, item->clauses, loc, clauseOps, reductionSyms); auto &builder = converter.getFirOpBuilder(); // Insert privatizations before SECTIONS lower::SymMapScope scope(symTable); DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, lower::omp::isLastItemInQueue(item, queue), /*useDelayedPrivatization=*/false, symTable); dsp.processStep1(); List nonDsaClauses; List lastprivates; for (const Clause &clause : item->clauses) { if (clause.id == llvm::omp::Clause::OMPC_lastprivate) { auto &lastp = std::get(clause.u); lastprivateModifierNotSupported(lastp, converter.getCurrentLocation()); lastprivates.push_back(&lastp); } else { switch (clause.id) { case llvm::omp::Clause::OMPC_firstprivate: case llvm::omp::Clause::OMPC_private: case llvm::omp::Clause::OMPC_shared: break; default: nonDsaClauses.push_back(clause); } } } // SECTIONS construct. auto sectionsOp = builder.create(loc, clauseOps); // Create entry block with reduction variables as arguments. EntryBlockArgs args; // TODO: Add private syms and vars. args.reduction.syms = reductionSyms; args.reduction.vars = clauseOps.reductionVars; genEntryBlock(builder, args, sectionsOp.getRegion()); mlir::Operation *terminator = lower::genOpenMPTerminator(builder, sectionsOp, loc); // Generate nested SECTION constructs. // This is done here rather than in genOMP([...], OpenMPSectionConstruct ) // because we need to run genReductionVars on each omp.section so that the // reduction variable gets mapped to the private version for (auto [construct, nestedEval] : llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) { const auto *sectionConstruct = std::get_if(&construct.u); if (!sectionConstruct) { assert(false && "unexpected construct nested inside of SECTIONS construct"); continue; } ConstructQueue sectionQueue{buildConstructQueue( converter.getFirOpBuilder().getModule(), semaCtx, nestedEval, sectionConstruct->source, llvm::omp::Directive::OMPD_section, {})}; builder.setInsertionPoint(terminator); genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval, llvm::omp::Directive::OMPD_section) .setClauses(§ionQueue.begin()->clauses) .setDataSharingProcessor(&dsp) .setEntryBlockArgs(&args), sectionQueue, sectionQueue.begin()); } if (!lastprivates.empty()) { mlir::Region §ionsBody = sectionsOp.getRegion(); assert(sectionsBody.hasOneBlock()); mlir::Block &body = sectionsBody.front(); auto lastSectionOp = llvm::find_if( llvm::reverse(body.getOperations()), [](const mlir::Operation &op) { return llvm::isa(op); }); assert(lastSectionOp != body.rend()); for (const clause::Lastprivate *lastp : lastprivates) { builder.setInsertionPoint( lastSectionOp->getRegion(0).back().getTerminator()); mlir::OpBuilder::InsertPoint insp = builder.saveInsertionPoint(); const auto &objList = std::get(lastp->t); for (const Object &object : objList) { semantics::Symbol *sym = object.sym(); if (const auto *common = sym->detailsIf()) { for (const auto &obj : common->objects()) converter.copyHostAssociateVar(*obj, &insp, /*hostIsSource=*/false); } else { converter.copyHostAssociateVar(*sym, &insp, /*hostIsSource=*/false); } } } } // Perform DataSharingProcessor's step2 out of SECTIONS builder.setInsertionPointAfter(sectionsOp.getOperation()); dsp.processStep2(sectionsOp, false); // Emit implicit barrier to synchronize threads and avoid data // races on post-update of lastprivate variables when `nowait` // clause is present. if (clauseOps.nowait && !lastprivates.empty()) builder.create(loc); return sectionsOp; } static mlir::Operation * genScopeOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { TODO(loc, "Scope construct"); return nullptr; } static mlir::omp::SingleOp genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::SingleOperands clauseOps; genSingleClauses(converter, semaCtx, item->clauses, loc, clauseOps); return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_single) .setClauses(&item->clauses), queue, item, clauseOps); } static mlir::omp::TargetOp genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); bool isTargetDevice = llvm::cast(*converter.getModuleOp()) .getIsTargetDevice(); // Introduce a new host_eval information structure for this target region. if (!isTargetDevice) hostEvalInfo.emplace_back(); mlir::omp::TargetOperands clauseOps; llvm::SmallVector mapSyms, isDevicePtrSyms, hasDeviceAddrSyms; genTargetClauses(converter, semaCtx, symTable, stmtCtx, eval, item->clauses, loc, clauseOps, hasDeviceAddrSyms, isDevicePtrSyms, mapSyms); DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/ lower::omp::isLastItemInQueue(item, queue), /*useDelayedPrivatization=*/true, symTable); dsp.processStep1(&clauseOps); // Check if a value of type `type` can be passed to the kernel by value. // All kernel parameters are of pointer type, so if the value can be // represented inside of a pointer, then it can be passed by value. auto isLiteralType = [&](mlir::Type type) { const mlir::DataLayout &dl = firOpBuilder.getDataLayout(); mlir::Type ptrTy = mlir::LLVM::LLVMPointerType::get(&converter.getMLIRContext()); uint64_t ptrSize = dl.getTypeSize(ptrTy); uint64_t ptrAlign = dl.getTypePreferredAlignment(ptrTy); auto [size, align] = fir::getTypeSizeAndAlignmentOrCrash( loc, type, dl, converter.getKindMap()); return size <= ptrSize && align <= ptrAlign; }; // 5.8.1 Implicit Data-Mapping Attribute Rules // The following code follows the implicit data-mapping rules to map all the // symbols used inside the region that do not have explicit data-environment // attribute clauses (neither data-sharing; e.g. `private`, nor `map` // clauses). auto captureImplicitMap = [&](const semantics::Symbol &sym) { if (dsp.getAllSymbolsToPrivatize().contains(&sym)) return; // These symbols are mapped individually in processHasDeviceAddr. if (llvm::is_contained(hasDeviceAddrSyms, &sym)) return; // Structure component symbols don't have bindings, and can only be // explicitly mapped individually. If a member is captured implicitly // we map the entirety of the derived type when we find its symbol. if (sym.owner().IsDerivedType()) return; // if the symbol is part of an already mapped common block, do not make a // map for it. if (const Fortran::semantics::Symbol *common = Fortran::semantics::FindCommonBlockContaining(sym.GetUltimate())) if (llvm::is_contained(mapSyms, common)) return; // If we come across a symbol without a symbol address, we // return as we cannot process it, this is intended as a // catch all early exit for symbols that do not have a // corresponding extended value. Such as subroutines, // interfaces and named blocks. if (!converter.getSymbolAddress(sym)) return; if (!llvm::is_contained(mapSyms, &sym)) { if (const auto *details = sym.template detailsIf()) converter.copySymbolBinding(details->symbol(), sym); std::stringstream name; fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym); name << sym.name().ToString(); mlir::FlatSymbolRefAttr mapperId; if (sym.GetType()->category() == semantics::DeclTypeSpec::TypeDerived) { auto &typeSpec = sym.GetType()->derivedTypeSpec(); std::string mapperIdName = typeSpec.name().ToString() + ".default"; mapperIdName = converter.mangleName(mapperIdName, *typeSpec.GetScope()); if (converter.getModuleOp().lookupSymbol(mapperIdName)) mapperId = mlir::FlatSymbolRefAttr::get(&converter.getMLIRContext(), mapperIdName); } fir::factory::AddrAndBoundsInfo info = Fortran::lower::getDataOperandBaseAddr( converter, firOpBuilder, sym.GetUltimate(), converter.getCurrentLocation()); llvm::SmallVector bounds = fir::factory::genImplicitBoundsOps( firOpBuilder, info, dataExv, semantics::IsAssumedSizeArray(sym.GetUltimate()), converter.getCurrentLocation()); llvm::omp::OpenMPOffloadMappingFlags mapFlag = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; mlir::omp::VariableCaptureKind captureKind = mlir::omp::VariableCaptureKind::ByRef; mlir::Value baseOp = info.rawInput; mlir::Type eleType = baseOp.getType(); if (auto refType = mlir::dyn_cast(baseOp.getType())) eleType = refType.getElementType(); // If a variable is specified in declare target link and if device // type is not specified as `nohost`, it needs to be mapped tofrom mlir::ModuleOp mod = firOpBuilder.getModule(); mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym)); auto declareTargetOp = llvm::dyn_cast_if_present(op); if (declareTargetOp && declareTargetOp.isDeclareTarget()) { if (declareTargetOp.getDeclareTargetCaptureClause() == mlir::omp::DeclareTargetCaptureClause::link && declareTargetOp.getDeclareTargetDeviceType() != mlir::omp::DeclareTargetDeviceType::nohost) { mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; } } else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { // Scalars behave as if they were "firstprivate". // TODO: Handle objects that are shared/lastprivate or were listed // in an in_reduction clause. if (isLiteralType(eleType)) { captureKind = mlir::omp::VariableCaptureKind::ByCopy; } else { mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; } } else if (!fir::isa_builtin_cptr_type(eleType)) { mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; } auto location = mlir::NameLoc::get(mlir::StringAttr::get(firOpBuilder.getContext(), sym.name().ToString()), baseOp.getLoc()); mlir::Value mapOp = createMapInfoOp( firOpBuilder, location, baseOp, /*varPtrPtr=*/mlir::Value{}, name.str(), bounds, /*members=*/{}, /*membersIndex=*/mlir::ArrayAttr{}, static_cast< std::underlying_type_t>( mapFlag), captureKind, baseOp.getType(), /*partialMap=*/false, mapperId); clauseOps.mapVars.push_back(mapOp); mapSyms.push_back(&sym); } }; lower::pft::visitAllSymbols(eval, captureImplicitMap); auto targetOp = firOpBuilder.create(loc, clauseOps); llvm::SmallVector hasDeviceAddrBaseValues, mapBaseValues; extractMappedBaseValues(clauseOps.hasDeviceAddrVars, hasDeviceAddrBaseValues); extractMappedBaseValues(clauseOps.mapVars, mapBaseValues); EntryBlockArgs args; args.hasDeviceAddr.syms = hasDeviceAddrSyms; args.hasDeviceAddr.vars = hasDeviceAddrBaseValues; args.hostEvalVars = clauseOps.hostEvalVars; // TODO: Add in_reduction syms and vars. args.map.syms = mapSyms; args.map.vars = mapBaseValues; args.priv.syms = dsp.getDelayedPrivSymbols(); args.priv.vars = clauseOps.privateVars; genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, args, loc, queue, item, dsp); // Remove the host_eval information structure created for this target region. if (!isTargetDevice) hostEvalInfo.pop_back(); return targetOp; } static mlir::omp::TargetDataOp genTargetDataOp( lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::TargetDataOperands clauseOps; llvm::SmallVector useDeviceAddrSyms, useDevicePtrSyms; genTargetDataClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps, useDeviceAddrSyms, useDevicePtrSyms); auto targetDataOp = converter.getFirOpBuilder().create(loc, clauseOps); llvm::SmallVector useDeviceAddrBaseValues, useDevicePtrBaseValues; extractMappedBaseValues(clauseOps.useDeviceAddrVars, useDeviceAddrBaseValues); extractMappedBaseValues(clauseOps.useDevicePtrVars, useDevicePtrBaseValues); EntryBlockArgs args; args.useDeviceAddr.syms = useDeviceAddrSyms; args.useDeviceAddr.vars = useDeviceAddrBaseValues; args.useDevicePtr.syms = useDevicePtrSyms; args.useDevicePtr.vars = useDevicePtrBaseValues; genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, targetDataOp, args, loc, queue, item); return targetDataOp; } template static OpTy genTargetEnterExitUpdateDataOp( lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // GCC 9.3.0 emits a (probably) bogus warning about an unused variable. [[maybe_unused]] llvm::omp::Directive directive; if constexpr (std::is_same_v) { directive = llvm::omp::Directive::OMPD_target_enter_data; } else if constexpr (std::is_same_v) { directive = llvm::omp::Directive::OMPD_target_exit_data; } else if constexpr (std::is_same_v) { directive = llvm::omp::Directive::OMPD_target_update; } else { llvm_unreachable("Unexpected TARGET DATA construct"); } mlir::omp::TargetEnterExitUpdateDataOperands clauseOps; genTargetEnterExitUpdateDataClauses(converter, semaCtx, symTable, stmtCtx, item->clauses, loc, directive, clauseOps); return firOpBuilder.create(loc, clauseOps); } static mlir::omp::TaskOp genTaskOp(lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::TaskOperands clauseOps; llvm::SmallVector inReductionSyms; genTaskClauses(converter, semaCtx, symTable, stmtCtx, item->clauses, loc, clauseOps, inReductionSyms); if (!enableDelayedPrivatization) return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_task) .setClauses(&item->clauses), queue, item, clauseOps); DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, lower::omp::isLastItemInQueue(item, queue), /*useDelayedPrivatization=*/true, symTable); dsp.processStep1(&clauseOps); EntryBlockArgs taskArgs; taskArgs.priv.syms = dsp.getDelayedPrivSymbols(); taskArgs.priv.vars = clauseOps.privateVars; taskArgs.inReduction.syms = inReductionSyms; taskArgs.inReduction.vars = clauseOps.inReductionVars; return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_task) .setClauses(&item->clauses) .setDataSharingProcessor(&dsp) .setEntryBlockArgs(&taskArgs), queue, item, clauseOps); } static mlir::omp::TaskgroupOp genTaskgroupOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::TaskgroupOperands clauseOps; llvm::SmallVector taskReductionSyms; genTaskgroupClauses(converter, semaCtx, item->clauses, loc, clauseOps, taskReductionSyms); EntryBlockArgs taskgroupArgs; taskgroupArgs.taskReduction.syms = taskReductionSyms; taskgroupArgs.taskReduction.vars = clauseOps.taskReductionVars; return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_taskgroup) .setClauses(&item->clauses) .setEntryBlockArgs(&taskgroupArgs), queue, item, clauseOps); } static mlir::omp::TaskwaitOp genTaskwaitOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::TaskwaitOperands clauseOps; genTaskwaitClauses(converter, semaCtx, item->clauses, loc, clauseOps); return converter.getFirOpBuilder().create(loc, clauseOps); } static mlir::omp::TaskyieldOp genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { return converter.getFirOpBuilder().create(loc); } static mlir::omp::WorkshareOp genWorkshareOp( lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::WorkshareOperands clauseOps; genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_workshare) .setClauses(&item->clauses), queue, item, clauseOps); } static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::TeamsOperands clauseOps; llvm::SmallVector reductionSyms; genTeamsClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps, reductionSyms); EntryBlockArgs args; // TODO: Add private syms and vars. args.reduction.syms = reductionSyms; args.reduction.vars = clauseOps.reductionVars; return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_teams) .setClauses(&item->clauses) .setEntryBlockArgs(&args), queue, item, clauseOps); } //===----------------------------------------------------------------------===// // Code generation for atomic operations //===----------------------------------------------------------------------===// /// Populates \p hint and \p memoryOrder with appropriate clause information /// if present on atomic construct. static void genOmpAtomicHintAndMemoryOrderClauses( lower::AbstractConverter &converter, const parser::OmpAtomicClauseList &clauseList, mlir::IntegerAttr &hint, mlir::omp::ClauseMemoryOrderKindAttr &memoryOrder) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); for (const parser::OmpAtomicClause &clause : clauseList.v) { common::visit( common::visitors{ [&](const parser::OmpMemoryOrderClause &s) { auto kind = common::visit( common::visitors{ [&](const parser::OmpClause::AcqRel &) { return mlir::omp::ClauseMemoryOrderKind::Acq_rel; }, [&](const parser::OmpClause::Acquire &) { return mlir::omp::ClauseMemoryOrderKind::Acquire; }, [&](const parser::OmpClause::Relaxed &) { return mlir::omp::ClauseMemoryOrderKind::Relaxed; }, [&](const parser::OmpClause::Release &) { return mlir::omp::ClauseMemoryOrderKind::Release; }, [&](const parser::OmpClause::SeqCst &) { return mlir::omp::ClauseMemoryOrderKind::Seq_cst; }, [&](auto &&) -> mlir::omp::ClauseMemoryOrderKind { llvm_unreachable("Unexpected clause"); }, }, s.v.u); memoryOrder = mlir::omp::ClauseMemoryOrderKindAttr::get( firOpBuilder.getContext(), kind); }, [&](const parser::OmpHintClause &s) { const auto *expr = semantics::GetExpr(s.v); uint64_t hintExprValue = *evaluate::ToInt64(*expr); hint = firOpBuilder.getI64IntegerAttr(hintExprValue); }, [&](const parser::OmpFailClause &) {}, }, clause.u); } } static void processOmpAtomicTODO(mlir::Type elementType, mlir::Location loc) { if (!elementType) return; assert(fir::isa_trivial(fir::unwrapRefType(elementType)) && "is supported type for omp atomic"); } /// Used to generate atomic.read operation which is created in existing /// location set by builder. static void genAtomicCaptureStatement( lower::AbstractConverter &converter, mlir::Value fromAddress, mlir::Value toAddress, const parser::OmpAtomicClauseList *leftHandClauseList, const parser::OmpAtomicClauseList *rightHandClauseList, mlir::Type elementType, mlir::Location loc) { // Generate `atomic.read` operation for atomic assigment statements fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); processOmpAtomicTODO(elementType, loc); // If no hint clause is specified, the effect is as if // hint(omp_sync_hint_none) had been specified. mlir::IntegerAttr hint = nullptr; mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr; if (leftHandClauseList) genOmpAtomicHintAndMemoryOrderClauses(converter, *leftHandClauseList, hint, memoryOrder); if (rightHandClauseList) genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint, memoryOrder); firOpBuilder.create(loc, fromAddress, toAddress, mlir::TypeAttr::get(elementType), hint, memoryOrder); } /// Used to generate atomic.write operation which is created in existing /// location set by builder. static void genAtomicWriteStatement( lower::AbstractConverter &converter, mlir::Value lhsAddr, mlir::Value rhsExpr, const parser::OmpAtomicClauseList *leftHandClauseList, const parser::OmpAtomicClauseList *rightHandClauseList, mlir::Location loc, mlir::Value *evaluatedExprValue = nullptr) { // Generate `atomic.write` operation for atomic assignment statements fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::Type varType = fir::unwrapRefType(lhsAddr.getType()); // Create a conversion outside the capture block. auto insertionPoint = firOpBuilder.saveInsertionPoint(); firOpBuilder.setInsertionPointAfter(rhsExpr.getDefiningOp()); rhsExpr = firOpBuilder.createConvert(loc, varType, rhsExpr); firOpBuilder.restoreInsertionPoint(insertionPoint); processOmpAtomicTODO(varType, loc); // If no hint clause is specified, the effect is as if // hint(omp_sync_hint_none) had been specified. mlir::IntegerAttr hint = nullptr; mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr; if (leftHandClauseList) genOmpAtomicHintAndMemoryOrderClauses(converter, *leftHandClauseList, hint, memoryOrder); if (rightHandClauseList) genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint, memoryOrder); firOpBuilder.create(loc, lhsAddr, rhsExpr, hint, memoryOrder); } /// Used to generate atomic.update operation which is created in existing /// location set by builder. static void genAtomicUpdateStatement( lower::AbstractConverter &converter, mlir::Value lhsAddr, mlir::Type varType, const parser::Variable &assignmentStmtVariable, const parser::Expr &assignmentStmtExpr, const parser::OmpAtomicClauseList *leftHandClauseList, const parser::OmpAtomicClauseList *rightHandClauseList, mlir::Location loc, mlir::Operation *atomicCaptureOp = nullptr) { // Generate `atomic.update` operation for atomic assignment statements fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::Location currentLocation = converter.getCurrentLocation(); // Create the omp.atomic.update or acc.atomic.update operation // // func.func @_QPsb() { // %0 = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFsbEa"} // %1 = fir.alloca i32 {bindc_name = "b", uniq_name = "_QFsbEb"} // %2 = fir.load %1 : !fir.ref // omp.atomic.update %0 : !fir.ref { // ^bb0(%arg0: i32): // %3 = arith.addi %arg0, %2 : i32 // omp.yield(%3 : i32) // } // return // } auto getArgExpression = [](std::list::const_iterator it) { const auto &arg{std::get((*it).t)}; const auto *parserExpr{ std::get_if>(&arg.u)}; return parserExpr; }; // Lower any non atomic sub-expression before the atomic operation, and // map its lowered value to the semantic representation. lower::ExprToValueMap exprValueOverrides; // Max and min intrinsics can have a list of Args. Hence we need a list // of nonAtomicSubExprs to hoist. Currently, only the load is hoisted. llvm::SmallVector nonAtomicSubExprs; common::visit( common::visitors{ [&](const common::Indirection &funcRef) -> void { const auto &args{std::get>( funcRef.value().v.t)}; std::list::const_iterator beginIt = args.begin(); std::list::const_iterator endIt = args.end(); const auto *exprFirst{getArgExpression(beginIt)}; if (exprFirst && exprFirst->value().source == assignmentStmtVariable.GetSource()) { // Add everything except the first beginIt++; } else { // Add everything except the last endIt--; } std::list::const_iterator it; for (it = beginIt; it != endIt; it++) { const common::Indirection *expr = getArgExpression(it); if (expr) nonAtomicSubExprs.push_back(semantics::GetExpr(*expr)); } }, [&](const auto &op) -> void { using T = std::decay_t; if constexpr (std::is_base_of::value) { const auto &exprLeft{std::get<0>(op.t)}; const auto &exprRight{std::get<1>(op.t)}; if (exprLeft.value().source == assignmentStmtVariable.GetSource()) nonAtomicSubExprs.push_back(semantics::GetExpr(exprRight)); else nonAtomicSubExprs.push_back(semantics::GetExpr(exprLeft)); } }, }, assignmentStmtExpr.u); lower::StatementContext nonAtomicStmtCtx; if (!nonAtomicSubExprs.empty()) { // Generate non atomic part before all the atomic operations. auto insertionPoint = firOpBuilder.saveInsertionPoint(); if (atomicCaptureOp) firOpBuilder.setInsertionPoint(atomicCaptureOp); mlir::Value nonAtomicVal; for (auto *nonAtomicSubExpr : nonAtomicSubExprs) { nonAtomicVal = fir::getBase(converter.genExprValue( currentLocation, *nonAtomicSubExpr, nonAtomicStmtCtx)); exprValueOverrides.try_emplace(nonAtomicSubExpr, nonAtomicVal); } if (atomicCaptureOp) firOpBuilder.restoreInsertionPoint(insertionPoint); } mlir::Operation *atomicUpdateOp = nullptr; // If no hint clause is specified, the effect is as if // hint(omp_sync_hint_none) had been specified. mlir::IntegerAttr hint = nullptr; mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr; if (leftHandClauseList) genOmpAtomicHintAndMemoryOrderClauses(converter, *leftHandClauseList, hint, memoryOrder); if (rightHandClauseList) genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint, memoryOrder); atomicUpdateOp = firOpBuilder.create( currentLocation, lhsAddr, hint, memoryOrder); processOmpAtomicTODO(varType, loc); llvm::SmallVector varTys = {varType}; llvm::SmallVector locs = {currentLocation}; firOpBuilder.createBlock(&atomicUpdateOp->getRegion(0), {}, varTys, locs); mlir::Value val = fir::getBase(atomicUpdateOp->getRegion(0).front().getArgument(0)); exprValueOverrides.try_emplace(semantics::GetExpr(assignmentStmtVariable), val); { // statement context inside the atomic block. converter.overrideExprValues(&exprValueOverrides); lower::StatementContext atomicStmtCtx; mlir::Value rhsExpr = fir::getBase(converter.genExprValue( *semantics::GetExpr(assignmentStmtExpr), atomicStmtCtx)); mlir::Type exprType = fir::unwrapRefType(rhsExpr.getType()); if (fir::isa_complex(exprType) && !fir::isa_complex(varType)) { // Emit an additional `ExtractValueOp` if the expression is of complex // type auto extract = firOpBuilder.create( currentLocation, mlir::cast(exprType).getElementType(), rhsExpr, firOpBuilder.getArrayAttr( firOpBuilder.getIntegerAttr(firOpBuilder.getIndexType(), 0))); mlir::Value convertResult = firOpBuilder.create( currentLocation, varType, extract); firOpBuilder.create(currentLocation, convertResult); } else { mlir::Value convertResult = firOpBuilder.createConvert(currentLocation, varType, rhsExpr); firOpBuilder.create(currentLocation, convertResult); } converter.resetExprOverrides(); } firOpBuilder.setInsertionPointAfter(atomicUpdateOp); } /// Processes an atomic construct with write clause. static void genAtomicWrite(lower::AbstractConverter &converter, const parser::OmpAtomicWrite &atomicWrite, mlir::Location loc) { const parser::OmpAtomicClauseList *rightHandClauseList = nullptr; const parser::OmpAtomicClauseList *leftHandClauseList = nullptr; // Get the address of atomic read operands. rightHandClauseList = &std::get<2>(atomicWrite.t); leftHandClauseList = &std::get<0>(atomicWrite.t); const parser::AssignmentStmt &stmt = std::get>(atomicWrite.t) .statement; const evaluate::Assignment &assign = *stmt.typedAssignment->v; lower::StatementContext stmtCtx; // Get the value and address of atomic write operands. mlir::Value rhsExpr = fir::getBase(converter.genExprValue(assign.rhs, stmtCtx)); mlir::Value lhsAddr = fir::getBase(converter.genExprAddr(assign.lhs, stmtCtx)); genAtomicWriteStatement(converter, lhsAddr, rhsExpr, leftHandClauseList, rightHandClauseList, loc); } /// Processes an atomic construct with read clause. static void genAtomicRead(lower::AbstractConverter &converter, const parser::OmpAtomicRead &atomicRead, mlir::Location loc) { const parser::OmpAtomicClauseList *rightHandClauseList = nullptr; const parser::OmpAtomicClauseList *leftHandClauseList = nullptr; // Get the address of atomic read operands. rightHandClauseList = &std::get<2>(atomicRead.t); leftHandClauseList = &std::get<0>(atomicRead.t); const auto &assignmentStmtExpr = std::get( std::get>(atomicRead.t) .statement.t); const auto &assignmentStmtVariable = std::get( std::get>(atomicRead.t) .statement.t); lower::StatementContext stmtCtx; const semantics::SomeExpr &fromExpr = *semantics::GetExpr(assignmentStmtExpr); mlir::Type elementType = converter.genType(fromExpr); mlir::Value fromAddress = fir::getBase(converter.genExprAddr(fromExpr, stmtCtx)); mlir::Value toAddress = fir::getBase(converter.genExprAddr( *semantics::GetExpr(assignmentStmtVariable), stmtCtx)); if (fromAddress.getType() != toAddress.getType()) { // Emit an implicit cast. Different yet compatible types on // omp.atomic.read constitute valid Fortran. The OMPIRBuilder will // emit atomic instructions (on primitive types) and `__atomic_load` // libcall (on complex type) without explicitly converting // between such compatible types. The OMPIRBuilder relies on the // frontend to resolve such inconsistencies between `omp.atomic.read ` // operand types. Similar inconsistencies between operand types in // `omp.atomic.write` are resolved through implicit casting by use of typed // assignment (i.e. `evaluate::Assignment`). However, use of typed // assignment in `omp.atomic.read` (of form `v = x`) leads to an unsafe, // non-atomic load of `x` into a temporary `alloca`, followed by an atomic // read of form `v = alloca`. Hence, it is needed to perform a custom // implicit cast. // An atomic read of form `v = x` would (without implicit casting) // lower to `omp.atomic.read %v = %x : !fir.ref, !fir.ref, // type2`. This implicit casting will rather generate the following FIR: // // %alloca = fir.alloca type2 // omp.atomic.read %alloca = %x : !fir.ref, !fir.ref, type2 // %load = fir.load %alloca : !fir.ref // %cvt = fir.convert %load : (type2) -> type1 // fir.store %cvt to %v : !fir.ref // These sequence of operations is thread-safe since each thread allocates // the `alloca` in its stack, and performs `%alloca = %x` atomically. Once // safely read, each thread performs the implicit cast on the local // `alloca`, and writes the final result to `%v`. mlir::Type toType = fir::unwrapRefType(toAddress.getType()); mlir::Type fromType = fir::unwrapRefType(fromAddress.getType()); fir::FirOpBuilder &builder = converter.getFirOpBuilder(); auto oldIP = builder.saveInsertionPoint(); builder.setInsertionPointToStart(builder.getAllocaBlock()); mlir::Value alloca = builder.create( loc, fromType); // Thread scope `alloca` to atomically read `%x`. builder.restoreInsertionPoint(oldIP); genAtomicCaptureStatement(converter, fromAddress, alloca, leftHandClauseList, rightHandClauseList, elementType, loc); auto load = builder.create(loc, alloca); if (fir::isa_complex(fromType) && !fir::isa_complex(toType)) { // Emit an additional `ExtractValueOp` if `fromAddress` is of complex // type, but `toAddress` is not. auto extract = builder.create( loc, mlir::cast(fromType).getElementType(), load, builder.getArrayAttr( builder.getIntegerAttr(builder.getIndexType(), 0))); auto cvt = builder.create(loc, toType, extract); builder.create(loc, cvt, toAddress); } else if (!fir::isa_complex(fromType) && fir::isa_complex(toType)) { // Emit an additional `InsertValueOp` if `toAddress` is of complex // type, but `fromAddress` is not. mlir::Value undef = builder.create(loc, toType); mlir::Type complexEleTy = mlir::cast(toType).getElementType(); mlir::Value cvt = builder.create(loc, complexEleTy, load); mlir::Value zero = builder.createRealZeroConstant(loc, complexEleTy); mlir::Value idx0 = builder.create( loc, toType, undef, cvt, builder.getArrayAttr( builder.getIntegerAttr(builder.getIndexType(), 0))); mlir::Value idx1 = builder.create( loc, toType, idx0, zero, builder.getArrayAttr( builder.getIntegerAttr(builder.getIndexType(), 1))); builder.create(loc, idx1, toAddress); } else { auto cvt = builder.create(loc, toType, load); builder.create(loc, cvt, toAddress); } } else genAtomicCaptureStatement(converter, fromAddress, toAddress, leftHandClauseList, rightHandClauseList, elementType, loc); } /// Processes an atomic construct with update clause. static void genAtomicUpdate(lower::AbstractConverter &converter, const parser::OmpAtomicUpdate &atomicUpdate, mlir::Location loc) { const parser::OmpAtomicClauseList *rightHandClauseList = nullptr; const parser::OmpAtomicClauseList *leftHandClauseList = nullptr; // Get the address of atomic read operands. rightHandClauseList = &std::get<2>(atomicUpdate.t); leftHandClauseList = &std::get<0>(atomicUpdate.t); const auto &assignmentStmtExpr = std::get( std::get>(atomicUpdate.t) .statement.t); const auto &assignmentStmtVariable = std::get( std::get>(atomicUpdate.t) .statement.t); lower::StatementContext stmtCtx; mlir::Value lhsAddr = fir::getBase(converter.genExprAddr( *semantics::GetExpr(assignmentStmtVariable), stmtCtx)); mlir::Type varType = fir::unwrapRefType(lhsAddr.getType()); genAtomicUpdateStatement(converter, lhsAddr, varType, assignmentStmtVariable, assignmentStmtExpr, leftHandClauseList, rightHandClauseList, loc); } /// Processes an atomic construct with no clause - which implies update clause. static void genOmpAtomic(lower::AbstractConverter &converter, const parser::OmpAtomic &atomicConstruct, mlir::Location loc) { const parser::OmpAtomicClauseList &atomicClauseList = std::get(atomicConstruct.t); const auto &assignmentStmtExpr = std::get( std::get>(atomicConstruct.t) .statement.t); const auto &assignmentStmtVariable = std::get( std::get>(atomicConstruct.t) .statement.t); lower::StatementContext stmtCtx; mlir::Value lhsAddr = fir::getBase(converter.genExprAddr( *semantics::GetExpr(assignmentStmtVariable), stmtCtx)); mlir::Type varType = fir::unwrapRefType(lhsAddr.getType()); // If atomic-clause is not present on the construct, the behaviour is as if // the update clause is specified (for both OpenMP and OpenACC). genAtomicUpdateStatement(converter, lhsAddr, varType, assignmentStmtVariable, assignmentStmtExpr, &atomicClauseList, nullptr, loc); } /// Processes an atomic construct with capture clause. static void genAtomicCapture(lower::AbstractConverter &converter, const parser::OmpAtomicCapture &atomicCapture, mlir::Location loc) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); const parser::AssignmentStmt &stmt1 = std::get(atomicCapture.t).v.statement; const evaluate::Assignment &assign1 = *stmt1.typedAssignment->v; const auto &stmt1Var{std::get(stmt1.t)}; const auto &stmt1Expr{std::get(stmt1.t)}; const parser::AssignmentStmt &stmt2 = std::get(atomicCapture.t).v.statement; const evaluate::Assignment &assign2 = *stmt2.typedAssignment->v; const auto &stmt2Var{std::get(stmt2.t)}; const auto &stmt2Expr{std::get(stmt2.t)}; // Pre-evaluate expressions to be used in the various operations inside // `atomic.capture` since it is not desirable to have anything other than // a `atomic.read`, `atomic.write`, or `atomic.update` operation // inside `atomic.capture` lower::StatementContext stmtCtx; // LHS evaluations are common to all combinations of `atomic.capture` mlir::Value stmt1LHSArg = fir::getBase(converter.genExprAddr(assign1.lhs, stmtCtx)); mlir::Value stmt2LHSArg = fir::getBase(converter.genExprAddr(assign2.lhs, stmtCtx)); // Type information used in generation of `atomic.update` operation mlir::Type stmt1VarType = fir::getBase(converter.genExprValue(assign1.lhs, stmtCtx)).getType(); mlir::Type stmt2VarType = fir::getBase(converter.genExprValue(assign2.lhs, stmtCtx)).getType(); // Check if implicit type is needed if (stmt1VarType != stmt2VarType) TODO(loc, "atomic capture requiring implicit type casts"); mlir::Operation *atomicCaptureOp = nullptr; mlir::IntegerAttr hint = nullptr; mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr; const parser::OmpAtomicClauseList &rightHandClauseList = std::get<2>(atomicCapture.t); const parser::OmpAtomicClauseList &leftHandClauseList = std::get<0>(atomicCapture.t); genOmpAtomicHintAndMemoryOrderClauses(converter, leftHandClauseList, hint, memoryOrder); genOmpAtomicHintAndMemoryOrderClauses(converter, rightHandClauseList, hint, memoryOrder); atomicCaptureOp = firOpBuilder.create(loc, hint, memoryOrder); firOpBuilder.createBlock(&(atomicCaptureOp->getRegion(0))); mlir::Block &block = atomicCaptureOp->getRegion(0).back(); firOpBuilder.setInsertionPointToStart(&block); if (semantics::checkForSingleVariableOnRHS(stmt1)) { if (semantics::checkForSymbolMatch(stmt2)) { // Atomic capture construct is of the form [capture-stmt, update-stmt] const semantics::SomeExpr &fromExpr = *semantics::GetExpr(stmt1Expr); mlir::Type elementType = converter.genType(fromExpr); genAtomicCaptureStatement(converter, stmt2LHSArg, stmt1LHSArg, /*leftHandClauseList=*/nullptr, /*rightHandClauseList=*/nullptr, elementType, loc); genAtomicUpdateStatement( converter, stmt2LHSArg, stmt2VarType, stmt2Var, stmt2Expr, /*leftHandClauseList=*/nullptr, /*rightHandClauseList=*/nullptr, loc, atomicCaptureOp); } else { // Atomic capture construct is of the form [capture-stmt, write-stmt] firOpBuilder.setInsertionPoint(atomicCaptureOp); mlir::Value stmt2RHSArg = fir::getBase(converter.genExprValue(assign2.rhs, stmtCtx)); firOpBuilder.setInsertionPointToStart(&block); const semantics::SomeExpr &fromExpr = *semantics::GetExpr(stmt1Expr); mlir::Type elementType = converter.genType(fromExpr); genAtomicCaptureStatement(converter, stmt2LHSArg, stmt1LHSArg, /*leftHandClauseList=*/nullptr, /*rightHandClauseList=*/nullptr, elementType, loc); genAtomicWriteStatement(converter, stmt2LHSArg, stmt2RHSArg, /*leftHandClauseList=*/nullptr, /*rightHandClauseList=*/nullptr, loc); } } else { // Atomic capture construct is of the form [update-stmt, capture-stmt] const semantics::SomeExpr &fromExpr = *semantics::GetExpr(stmt2Expr); mlir::Type elementType = converter.genType(fromExpr); genAtomicUpdateStatement( converter, stmt1LHSArg, stmt1VarType, stmt1Var, stmt1Expr, /*leftHandClauseList=*/nullptr, /*rightHandClauseList=*/nullptr, loc, atomicCaptureOp); genAtomicCaptureStatement(converter, stmt1LHSArg, stmt2LHSArg, /*leftHandClauseList=*/nullptr, /*rightHandClauseList=*/nullptr, elementType, loc); } firOpBuilder.setInsertionPointToEnd(&block); firOpBuilder.create(loc); firOpBuilder.setInsertionPointToStart(&block); } //===----------------------------------------------------------------------===// // Code generation functions for the standalone version of constructs that can // also be a leaf of a composite construct //===----------------------------------------------------------------------===// static mlir::omp::DistributeOp genStandaloneDistribute( lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::DistributeOperands distributeClauseOps; genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc, distributeClauseOps); DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, enableDelayedPrivatization, symTable); dsp.processStep1(&distributeClauseOps); mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, loopNestClauseOps, iv); EntryBlockArgs distributeArgs; distributeArgs.priv.syms = dsp.getDelayedPrivSymbols(); distributeArgs.priv.vars = distributeClauseOps.privateVars; auto distributeOp = genWrapperOp( converter, loc, distributeClauseOps, distributeArgs); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, loopNestClauseOps, iv, {{distributeOp, distributeArgs}}, llvm::omp::Directive::OMPD_distribute, dsp); return distributeOp; } static mlir::omp::WsloopOp genStandaloneDo( lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::WsloopOperands wsloopClauseOps; llvm::SmallVector wsloopReductionSyms; genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, wsloopClauseOps, wsloopReductionSyms); DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, enableDelayedPrivatization, symTable); dsp.processStep1(&wsloopClauseOps); mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, loopNestClauseOps, iv); EntryBlockArgs wsloopArgs; wsloopArgs.priv.syms = dsp.getDelayedPrivSymbols(); wsloopArgs.priv.vars = wsloopClauseOps.privateVars; wsloopArgs.reduction.syms = wsloopReductionSyms; wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars; auto wsloopOp = genWrapperOp( converter, loc, wsloopClauseOps, wsloopArgs); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, loopNestClauseOps, iv, {{wsloopOp, wsloopArgs}}, llvm::omp::Directive::OMPD_do, dsp); return wsloopOp; } static mlir::omp::ParallelOp genStandaloneParallel( lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::ParallelOperands parallelClauseOps; llvm::SmallVector parallelReductionSyms; genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc, parallelClauseOps, parallelReductionSyms); std::optional dsp; if (enableDelayedPrivatization) { dsp.emplace(converter, semaCtx, item->clauses, eval, lower::omp::isLastItemInQueue(item, queue), /*useDelayedPrivatization=*/true, symTable); dsp->processStep1(¶llelClauseOps); } EntryBlockArgs parallelArgs; if (dsp) parallelArgs.priv.syms = dsp->getDelayedPrivSymbols(); parallelArgs.priv.vars = parallelClauseOps.privateVars; parallelArgs.reduction.syms = parallelReductionSyms; parallelArgs.reduction.vars = parallelClauseOps.reductionVars; return genParallelOp(converter, symTable, semaCtx, eval, loc, queue, item, parallelClauseOps, parallelArgs, enableDelayedPrivatization ? &dsp.value() : nullptr); } static mlir::omp::SimdOp genStandaloneSimd(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::SimdOperands simdClauseOps; llvm::SmallVector simdReductionSyms; genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps, simdReductionSyms); DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, enableDelayedPrivatization, symTable); dsp.processStep1(&simdClauseOps); mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, loopNestClauseOps, iv); EntryBlockArgs simdArgs; simdArgs.priv.syms = dsp.getDelayedPrivSymbols(); simdArgs.priv.vars = simdClauseOps.privateVars; simdArgs.reduction.syms = simdReductionSyms; simdArgs.reduction.vars = simdClauseOps.reductionVars; auto simdOp = genWrapperOp(converter, loc, simdClauseOps, simdArgs); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, loopNestClauseOps, iv, {{simdOp, simdArgs}}, llvm::omp::Directive::OMPD_simd, dsp); return simdOp; } static mlir::omp::TaskloopOp genStandaloneTaskloop( lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::TaskloopOperands taskloopClauseOps; genTaskloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, taskloopClauseOps); DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, enableDelayedPrivatization, symTable); dsp.processStep1(&taskloopClauseOps); mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, loopNestClauseOps, iv); EntryBlockArgs taskloopArgs; taskloopArgs.priv.syms = dsp.getDelayedPrivSymbols(); taskloopArgs.priv.vars = taskloopClauseOps.privateVars; auto taskLoopOp = genWrapperOp( converter, loc, taskloopClauseOps, taskloopArgs); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, loopNestClauseOps, iv, {{taskLoopOp, taskloopArgs}}, llvm::omp::Directive::OMPD_taskloop, dsp); return taskLoopOp; } //===----------------------------------------------------------------------===// // Code generation functions for composite constructs //===----------------------------------------------------------------------===// static mlir::omp::DistributeOp genCompositeDistributeParallelDo( lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs"); ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator parallelItem = std::next(distributeItem); ConstructQueue::const_iterator doItem = std::next(parallelItem); // Create parent omp.parallel first. mlir::omp::ParallelOperands parallelClauseOps; llvm::SmallVector parallelReductionSyms; genParallelClauses(converter, semaCtx, stmtCtx, parallelItem->clauses, loc, parallelClauseOps, parallelReductionSyms); DataSharingProcessor dsp(converter, semaCtx, doItem->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, /*useDelayedPrivatization=*/true, symTable); dsp.processStep1(¶llelClauseOps); EntryBlockArgs parallelArgs; parallelArgs.priv.syms = dsp.getDelayedPrivSymbols(); parallelArgs.priv.vars = parallelClauseOps.privateVars; parallelArgs.reduction.syms = parallelReductionSyms; parallelArgs.reduction.vars = parallelClauseOps.reductionVars; genParallelOp(converter, symTable, semaCtx, eval, loc, queue, parallelItem, parallelClauseOps, parallelArgs, &dsp, /*isComposite=*/true); // Clause processing. mlir::omp::DistributeOperands distributeClauseOps; genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses, loc, distributeClauseOps); mlir::omp::WsloopOperands wsloopClauseOps; llvm::SmallVector wsloopReductionSyms; genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc, wsloopClauseOps, wsloopReductionSyms); mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, doItem->clauses, loc, loopNestClauseOps, iv); // Operation creation. EntryBlockArgs distributeArgs; // TODO: Add private syms and vars. auto distributeOp = genWrapperOp( converter, loc, distributeClauseOps, distributeArgs); distributeOp.setComposite(/*val=*/true); EntryBlockArgs wsloopArgs; // TODO: Add private syms and vars. wsloopArgs.reduction.syms = wsloopReductionSyms; wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars; auto wsloopOp = genWrapperOp( converter, loc, wsloopClauseOps, wsloopArgs); wsloopOp.setComposite(/*val=*/true); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, doItem, loopNestClauseOps, iv, {{distributeOp, distributeArgs}, {wsloopOp, wsloopArgs}}, llvm::omp::Directive::OMPD_distribute_parallel_do, dsp); return distributeOp; } static mlir::omp::DistributeOp genCompositeDistributeParallelDoSimd( lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs"); ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator parallelItem = std::next(distributeItem); ConstructQueue::const_iterator doItem = std::next(parallelItem); ConstructQueue::const_iterator simdItem = std::next(doItem); // Create parent omp.parallel first. mlir::omp::ParallelOperands parallelClauseOps; llvm::SmallVector parallelReductionSyms; genParallelClauses(converter, semaCtx, stmtCtx, parallelItem->clauses, loc, parallelClauseOps, parallelReductionSyms); DataSharingProcessor parallelItemDSP( converter, semaCtx, parallelItem->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/false, /*useDelayedPrivatization=*/true, symTable); parallelItemDSP.processStep1(¶llelClauseOps); EntryBlockArgs parallelArgs; parallelArgs.priv.syms = parallelItemDSP.getDelayedPrivSymbols(); parallelArgs.priv.vars = parallelClauseOps.privateVars; parallelArgs.reduction.syms = parallelReductionSyms; parallelArgs.reduction.vars = parallelClauseOps.reductionVars; genParallelOp(converter, symTable, semaCtx, eval, loc, queue, parallelItem, parallelClauseOps, parallelArgs, ¶llelItemDSP, /*isComposite=*/true); // Clause processing. mlir::omp::DistributeOperands distributeClauseOps; genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses, loc, distributeClauseOps); mlir::omp::WsloopOperands wsloopClauseOps; llvm::SmallVector wsloopReductionSyms; genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc, wsloopClauseOps, wsloopReductionSyms); mlir::omp::SimdOperands simdClauseOps; llvm::SmallVector simdReductionSyms; genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps, simdReductionSyms); DataSharingProcessor simdItemDSP(converter, semaCtx, simdItem->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, /*useDelayedPrivatization=*/true, symTable); simdItemDSP.processStep1(&simdClauseOps); mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc, loopNestClauseOps, iv); // Operation creation. EntryBlockArgs distributeArgs; // TODO: Add private syms and vars. auto distributeOp = genWrapperOp( converter, loc, distributeClauseOps, distributeArgs); distributeOp.setComposite(/*val=*/true); EntryBlockArgs wsloopArgs; // TODO: Add private syms and vars. wsloopArgs.reduction.syms = wsloopReductionSyms; wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars; auto wsloopOp = genWrapperOp( converter, loc, wsloopClauseOps, wsloopArgs); wsloopOp.setComposite(/*val=*/true); EntryBlockArgs simdArgs; simdArgs.priv.syms = simdItemDSP.getDelayedPrivSymbols(); simdArgs.priv.vars = simdClauseOps.privateVars; simdArgs.reduction.syms = simdReductionSyms; simdArgs.reduction.vars = simdClauseOps.reductionVars; auto simdOp = genWrapperOp(converter, loc, simdClauseOps, simdArgs); simdOp.setComposite(/*val=*/true); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, loopNestClauseOps, iv, {{distributeOp, distributeArgs}, {wsloopOp, wsloopArgs}, {simdOp, simdArgs}}, llvm::omp::Directive::OMPD_distribute_parallel_do_simd, simdItemDSP); return distributeOp; } static mlir::omp::DistributeOp genCompositeDistributeSimd( lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator simdItem = std::next(distributeItem); // Clause processing. mlir::omp::DistributeOperands distributeClauseOps; genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses, loc, distributeClauseOps); mlir::omp::SimdOperands simdClauseOps; llvm::SmallVector simdReductionSyms; genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps, simdReductionSyms); // TODO: Support delayed privatization. DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, /*useDelayedPrivatization=*/false, symTable); dsp.processStep1(); // Pass the innermost leaf construct's clauses because that's where COLLAPSE // is placed by construct decomposition. mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc, loopNestClauseOps, iv); // Operation creation. EntryBlockArgs distributeArgs; // TODO: Add private syms and vars. auto distributeOp = genWrapperOp( converter, loc, distributeClauseOps, distributeArgs); distributeOp.setComposite(/*val=*/true); EntryBlockArgs simdArgs; // TODO: Add private syms and vars. simdArgs.reduction.syms = simdReductionSyms; simdArgs.reduction.vars = simdClauseOps.reductionVars; auto simdOp = genWrapperOp(converter, loc, simdClauseOps, simdArgs); simdOp.setComposite(/*val=*/true); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, loopNestClauseOps, iv, {{distributeOp, distributeArgs}, {simdOp, simdArgs}}, llvm::omp::Directive::OMPD_distribute_simd, dsp); return distributeOp; } static mlir::omp::WsloopOp genCompositeDoSimd( lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); ConstructQueue::const_iterator doItem = item; ConstructQueue::const_iterator simdItem = std::next(doItem); // Clause processing. mlir::omp::WsloopOperands wsloopClauseOps; llvm::SmallVector wsloopReductionSyms; genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc, wsloopClauseOps, wsloopReductionSyms); mlir::omp::SimdOperands simdClauseOps; llvm::SmallVector simdReductionSyms; genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps, simdReductionSyms); // TODO: Support delayed privatization. DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, /*useDelayedPrivatization=*/false, symTable); dsp.processStep1(); // Pass the innermost leaf construct's clauses because that's where COLLAPSE // is placed by construct decomposition. mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc, loopNestClauseOps, iv); // Operation creation. EntryBlockArgs wsloopArgs; // TODO: Add private syms and vars. wsloopArgs.reduction.syms = wsloopReductionSyms; wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars; auto wsloopOp = genWrapperOp( converter, loc, wsloopClauseOps, wsloopArgs); wsloopOp.setComposite(/*val=*/true); EntryBlockArgs simdArgs; // TODO: Add private syms and vars. simdArgs.reduction.syms = simdReductionSyms; simdArgs.reduction.vars = simdClauseOps.reductionVars; auto simdOp = genWrapperOp(converter, loc, simdClauseOps, simdArgs); simdOp.setComposite(/*val=*/true); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, loopNestClauseOps, iv, {{wsloopOp, wsloopArgs}, {simdOp, simdArgs}}, llvm::omp::Directive::OMPD_do_simd, dsp); return wsloopOp; } static mlir::omp::TaskloopOp genCompositeTaskloopSimd( lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); TODO(loc, "Composite TASKLOOP SIMD"); return nullptr; } //===----------------------------------------------------------------------===// // Dispatch //===----------------------------------------------------------------------===// static bool genOMPCompositeDispatch( lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, mlir::Operation *&newOp) { using llvm::omp::Directive; using lower::omp::matchLeafSequence; // TODO: Privatization for composite constructs is currently only done based // on the clauses for their last leaf construct, which may not always be // correct. Consider per-leaf privatization of composite constructs once // delayed privatization is supported by all participating ops. if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do)) newOp = genCompositeDistributeParallelDo(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do_simd)) newOp = genCompositeDistributeParallelDoSimd( converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_simd)) newOp = genCompositeDistributeSimd(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); else if (matchLeafSequence(item, queue, Directive::OMPD_do_simd)) newOp = genCompositeDoSimd(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); else if (matchLeafSequence(item, queue, Directive::OMPD_taskloop_simd)) newOp = genCompositeTaskloopSimd(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); else return false; return true; } static void genOMPDispatch(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { assert(item != queue.end()); lower::StatementContext stmtCtx; mlir::Operation *newOp = nullptr; // Generate cleanup code for the stmtCtx after newOp auto finalizeStmtCtx = [&]() { if (newOp) { fir::FirOpBuilder &builder = converter.getFirOpBuilder(); fir::FirOpBuilder::InsertionGuard guard(builder); builder.setInsertionPointAfter(newOp); stmtCtx.finalizeAndPop(); } }; bool loopLeaf = llvm::omp::getDirectiveAssociation(item->id) == llvm::omp::Association::Loop; if (loopLeaf) { symTable.pushScope(); if (genOMPCompositeDispatch(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item, newOp)) { symTable.popScope(); finalizeStmtCtx(); return; } } switch (llvm::omp::Directive dir = item->id) { case llvm::omp::Directive::OMPD_barrier: newOp = genBarrierOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_distribute: newOp = genStandaloneDistribute(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_do: newOp = genStandaloneDo(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_loop: newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_masked: newOp = genMaskedOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_master: newOp = genMasterOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_ordered: // Block-associated "ordered" construct. newOp = genOrderedRegionOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_parallel: newOp = genStandaloneParallel(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_scan: newOp = genScanOp(converter, symTable, semaCtx, loc, queue, item); break; case llvm::omp::Directive::OMPD_section: llvm_unreachable("genOMPDispatch: OMPD_section"); // Lowered in the enclosing genSectionsOp. break; case llvm::omp::Directive::OMPD_sections: // Called directly from genOMP([...], OpenMPSectionsConstruct) because it // has a different prototype. // This code path is still taken when iterating through the construct queue // in genBodyOfOp break; case llvm::omp::Directive::OMPD_simd: newOp = genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_scope: newOp = genScopeOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_single: newOp = genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_target: newOp = genTargetOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_target_data: newOp = genTargetDataOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_target_enter_data: newOp = genTargetEnterExitUpdateDataOp( converter, symTable, stmtCtx, semaCtx, loc, queue, item); break; case llvm::omp::Directive::OMPD_target_exit_data: newOp = genTargetEnterExitUpdateDataOp( converter, symTable, stmtCtx, semaCtx, loc, queue, item); break; case llvm::omp::Directive::OMPD_target_update: newOp = genTargetEnterExitUpdateDataOp( converter, symTable, stmtCtx, semaCtx, loc, queue, item); break; case llvm::omp::Directive::OMPD_task: newOp = genTaskOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_taskgroup: newOp = genTaskgroupOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_taskloop: newOp = genStandaloneTaskloop(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_taskwait: newOp = genTaskwaitOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_taskyield: newOp = genTaskyieldOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_teams: newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_tile: case llvm::omp::Directive::OMPD_unroll: TODO(loc, "Unhandled loop directive (" + llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; default: // Combined and composite constructs should have been split into a sequence // of leaf constructs when building the construct queue. assert(!llvm::omp::isLeafConstruct(dir) && "Unexpected compound construct."); break; } finalizeStmtCtx(); if (loopLeaf) symTable.popScope(); } //===----------------------------------------------------------------------===// // OpenMPDeclarativeConstruct visitors //===----------------------------------------------------------------------===// static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPUtilityConstruct &); static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclarativeAllocate &declarativeAllocate) { TODO(converter.getCurrentLocation(), "OpenMPDeclarativeAllocate"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclarativeAssumes &assumesConstruct) { TODO(converter.getCurrentLocation(), "OpenMP ASSUMES declaration"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OmpDeclareVariantDirective &declareVariantDirective) { TODO(converter.getCurrentLocation(), "OmpDeclareVariantDirective"); } static void genOMP( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclareReductionConstruct &declareReductionConstruct) { TODO(converter.getCurrentLocation(), "OpenMPDeclareReductionConstruct"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclareSimdConstruct &declareSimdConstruct) { TODO(converter.getCurrentLocation(), "OpenMPDeclareSimdConstruct"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclareMapperConstruct &declareMapperConstruct) { mlir::Location loc = converter.genLocation(declareMapperConstruct.source); fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); lower::StatementContext stmtCtx; const auto &spec = std::get(declareMapperConstruct.t); const auto &mapperName{std::get>(spec.t)}; const auto &varType{std::get(spec.t)}; const auto &varName{std::get(spec.t)}; assert(varType.declTypeSpec->category() == semantics::DeclTypeSpec::Category::TypeDerived && "Expected derived type"); std::string mapperNameStr; if (mapperName.has_value()) { mapperNameStr = mapperName->ToString(); mapperNameStr = converter.mangleName(mapperNameStr, mapperName->symbol->owner()); } else { mapperNameStr = varType.declTypeSpec->derivedTypeSpec().name().ToString() + ".default"; mapperNameStr = converter.mangleName( mapperNameStr, *varType.declTypeSpec->derivedTypeSpec().GetScope()); } // Save current insertion point before moving to the module scope to create // the DeclareMapperOp mlir::OpBuilder::InsertionGuard guard(firOpBuilder); firOpBuilder.setInsertionPointToStart(converter.getModuleOp().getBody()); auto mlirType = converter.genType(varType.declTypeSpec->derivedTypeSpec()); auto declMapperOp = firOpBuilder.create( loc, mapperNameStr, mlirType); auto ®ion = declMapperOp.getRegion(); firOpBuilder.createBlock(®ion); auto varVal = region.addArgument(firOpBuilder.getRefType(mlirType), loc); converter.bindSymbol(*varName.symbol, varVal); // Populate the declareMapper region with the map information. mlir::omp::DeclareMapperInfoOperands clauseOps; const auto *clauseList{ parser::Unwrap(declareMapperConstruct.t)}; List clauses = makeClauses(*clauseList, semaCtx); ClauseProcessor cp(converter, semaCtx, clauses); cp.processMap(loc, stmtCtx, clauseOps); firOpBuilder.create(loc, clauseOps.mapVars); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) { mlir::omp::DeclareTargetOperands clauseOps; llvm::SmallVector symbolAndClause; mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, clauseOps, symbolAndClause); for (const DeclareTargetCapturePair &symClause : symbolAndClause) { mlir::Operation *op = mod.lookupSymbol( converter.mangleName(std::get(symClause))); // Some symbols are deferred until later in the module, these are handled // upon finalization of the module for OpenMP inside of Bridge, so we simply // skip for now. if (!op) continue; markDeclareTarget( op, converter, std::get(symClause), clauseOps.deviceType); } } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPRequiresConstruct &requiresConstruct) { // Requires directives are gathered and processed in semantics and // then combined in the lowering bridge before triggering codegen // just once. Hence, there is no need to lower each individual // occurrence here. } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPThreadprivate &threadprivate) { // The directive is lowered when instantiating the variable to // support the case of threadprivate variable declared in module. } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OmpMetadirectiveDirective &meta) { TODO(converter.getCurrentLocation(), "METADIRECTIVE"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclarativeConstruct &ompDeclConstruct) { Fortran::common::visit( [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, ompDeclConstruct.u); } //===----------------------------------------------------------------------===// // OpenMPStandaloneConstruct visitors //===----------------------------------------------------------------------===// static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPSimpleStandaloneConstruct &construct) { const auto &directive = std::get(construct.v.t); List clauses = makeClauses(construct.v.Clauses(), semaCtx); mlir::Location currentLocation = converter.genLocation(directive.source); ConstructQueue queue{ buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, eval, directive.source, directive.v, clauses)}; if (directive.v == llvm::omp::Directive::OMPD_ordered) { // Standalone "ordered" directive. genOrderedOp(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); } else { // Dispatch handles the "block-associated" variant of "ordered". genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); } } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPFlushConstruct &construct) { const auto &argumentList = construct.v.Arguments(); const auto &clauseList = construct.v.Clauses(); ObjectList objects = makeObjects(argumentList, semaCtx); List clauses = makeList(clauseList.v, [&](auto &&s) { return makeClause(s, semaCtx); }); mlir::Location currentLocation = converter.genLocation(construct.source); ConstructQueue queue{buildConstructQueue( converter.getFirOpBuilder().getModule(), semaCtx, eval, construct.source, llvm::omp::Directive::OMPD_flush, clauses)}; genFlushOp(converter, symTable, semaCtx, eval, currentLocation, objects, queue, queue.begin()); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPCancelConstruct &cancelConstruct) { List clauses = makeList(cancelConstruct.v.Clauses().v, [&](auto &&s) { return makeClause(s, semaCtx); }); mlir::Location loc = converter.genLocation(cancelConstruct.source); ConstructQueue queue{buildConstructQueue( converter.getFirOpBuilder().getModule(), semaCtx, eval, cancelConstruct.source, llvm::omp::Directive::OMPD_cancel, clauses)}; genCancelOp(converter, semaCtx, eval, loc, queue, queue.begin()); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPCancellationPointConstruct &cancellationPointConstruct) { List clauses = makeList(cancellationPointConstruct.v.Clauses().v, [&](auto &&s) { return makeClause(s, semaCtx); }); mlir::Location loc = converter.genLocation(cancellationPointConstruct.source); ConstructQueue queue{ buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, eval, cancellationPointConstruct.source, llvm::omp::Directive::OMPD_cancel, clauses)}; genCancellationPointOp(converter, semaCtx, eval, loc, queue, queue.begin()); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDepobjConstruct &construct) { // These values will be ignored until the construct itself is implemented, // but run them anyway for the sake of testing (via a Todo test). ObjectList objects = makeObjects(construct.v.Arguments(), semaCtx); assert(objects.size() == 1); List clauses = makeClauses(construct.v.Clauses(), semaCtx); assert(clauses.size() == 1); (void)objects; (void)clauses; TODO(converter.getCurrentLocation(), "OpenMPDepobjConstruct"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPInteropConstruct &interopConstruct) { TODO(converter.getCurrentLocation(), "OpenMPInteropConstruct"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPStandaloneConstruct &standaloneConstruct) { Fortran::common::visit( [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, standaloneConstruct.u); } //===----------------------------------------------------------------------===// // OpenMPConstruct visitors //===----------------------------------------------------------------------===// static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPAllocatorsConstruct &allocsConstruct) { TODO(converter.getCurrentLocation(), "OpenMPAllocatorsConstruct"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPAtomicConstruct &atomicConstruct) { Fortran::common::visit( common::visitors{ [&](const parser::OmpAtomicRead &atomicRead) { mlir::Location loc = converter.genLocation(atomicRead.source); genAtomicRead(converter, atomicRead, loc); }, [&](const parser::OmpAtomicWrite &atomicWrite) { mlir::Location loc = converter.genLocation(atomicWrite.source); genAtomicWrite(converter, atomicWrite, loc); }, [&](const parser::OmpAtomic &atomicConstruct) { mlir::Location loc = converter.genLocation(atomicConstruct.source); genOmpAtomic(converter, atomicConstruct, loc); }, [&](const parser::OmpAtomicUpdate &atomicUpdate) { mlir::Location loc = converter.genLocation(atomicUpdate.source); genAtomicUpdate(converter, atomicUpdate, loc); }, [&](const parser::OmpAtomicCapture &atomicCapture) { mlir::Location loc = converter.genLocation(atomicCapture.source); genAtomicCapture(converter, atomicCapture, loc); }, [&](const parser::OmpAtomicCompare &atomicCompare) { mlir::Location loc = converter.genLocation(atomicCompare.source); TODO(loc, "OpenMP atomic compare"); }, }, atomicConstruct.u); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPBlockConstruct &blockConstruct) { const auto &beginBlockDirective = std::get(blockConstruct.t); const auto &endBlockDirective = std::get(blockConstruct.t); mlir::Location currentLocation = converter.genLocation(beginBlockDirective.source); const auto origDirective = std::get(beginBlockDirective.t).v; List clauses = makeClauses( std::get(beginBlockDirective.t), semaCtx); clauses.append(makeClauses( std::get(endBlockDirective.t), semaCtx)); assert(llvm::omp::blockConstructSet.test(origDirective) && "Expected block construct"); (void)origDirective; for (const Clause &clause : clauses) { mlir::Location clauseLocation = converter.genLocation(clause.source); if (!std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u)) { std::string name = parser::ToUpperCaseLetters(llvm::omp::getOpenMPClauseName(clause.id)); TODO(clauseLocation, name + " clause is not implemented yet"); } } llvm::omp::Directive directive = std::get(beginBlockDirective.t).v; const parser::CharBlock &source = std::get(beginBlockDirective.t).source; ConstructQueue queue{ buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, eval, source, directive, clauses)}; genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPAssumeConstruct &assumeConstruct) { mlir::Location clauseLocation = converter.genLocation(assumeConstruct.source); TODO(clauseLocation, "OpenMP ASSUME construct"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPCriticalConstruct &criticalConstruct) { const auto &cd = std::get(criticalConstruct.t); List clauses = makeClauses(std::get(cd.t), semaCtx); ConstructQueue queue{buildConstructQueue( converter.getFirOpBuilder().getModule(), semaCtx, eval, cd.source, llvm::omp::Directive::OMPD_critical, clauses)}; const auto &name = std::get>(cd.t); mlir::Location currentLocation = converter.getCurrentLocation(); genCriticalOp(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin(), name); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPUtilityConstruct &) { TODO(converter.getCurrentLocation(), "OpenMPUtilityConstruct"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDispatchConstruct &) { TODO(converter.getCurrentLocation(), "OpenMPDispatchConstruct"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPExecutableAllocate &execAllocConstruct) { TODO(converter.getCurrentLocation(), "OpenMPExecutableAllocate"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPLoopConstruct &loopConstruct) { const auto &beginLoopDirective = std::get(loopConstruct.t); List clauses = makeClauses( std::get(beginLoopDirective.t), semaCtx); if (auto &endLoopDirective = std::get>( loopConstruct.t)) { clauses.append(makeClauses( std::get(endLoopDirective->t), semaCtx)); } mlir::Location currentLocation = converter.genLocation(beginLoopDirective.source); llvm::omp::Directive directive = std::get(beginLoopDirective.t).v; const parser::CharBlock &source = std::get(beginLoopDirective.t).source; ConstructQueue queue{ buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, eval, source, directive, clauses)}; genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPSectionConstruct §ionConstruct) { // Do nothing here. SECTION is lowered inside of the lowering for Sections } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPSectionsConstruct §ionsConstruct) { const auto &beginSectionsDirective = std::get(sectionsConstruct.t); List clauses = makeClauses( std::get(beginSectionsDirective.t), semaCtx); const auto &endSectionsDirective = std::get(sectionsConstruct.t); const auto §ionBlocks = std::get(sectionsConstruct.t); clauses.append(makeClauses( std::get(endSectionsDirective.t), semaCtx)); mlir::Location currentLocation = converter.getCurrentLocation(); llvm::omp::Directive directive = std::get(beginSectionsDirective.t).v; const parser::CharBlock &source = std::get(beginSectionsDirective.t).source; ConstructQueue queue{ buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, eval, source, directive, clauses)}; ConstructQueue::iterator next = queue.begin(); // Generate constructs that come first e.g. Parallel while (next != queue.end() && next->id != llvm::omp::Directive::OMPD_sections) { genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, next); next = std::next(next); } // call genSectionsOp directly (not via genOMPDispatch) so that we can add the // sectionBlocks argument assert(next != queue.end()); assert(next->id == llvm::omp::Directive::OMPD_sections); genSectionsOp(converter, symTable, semaCtx, eval, currentLocation, queue, next, sectionBlocks); assert(std::next(next) == queue.end()); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPConstruct &ompConstruct) { Fortran::common::visit( [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, ompConstruct.u); } //===----------------------------------------------------------------------===// // Public functions //===----------------------------------------------------------------------===// mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder, mlir::Operation *op, mlir::Location loc) { if (mlir::isa(op)) return builder.create(loc); return builder.create(loc); } void Fortran::lower::genOpenMPConstruct(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPConstruct &omp) { lower::SymMapScope scope(symTable); genOMP(converter, symTable, semaCtx, eval, omp); } void Fortran::lower::genOpenMPDeclarativeConstruct( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclarativeConstruct &omp) { genOMP(converter, symTable, semaCtx, eval, omp); genNestedEvaluations(converter, eval); } void Fortran::lower::genOpenMPSymbolProperties( lower::AbstractConverter &converter, const lower::pft::Variable &var) { assert(var.hasSymbol() && "Expecting Symbol"); const semantics::Symbol &sym = var.getSymbol(); if (sym.test(semantics::Symbol::Flag::OmpThreadprivate)) lower::genThreadprivateOp(converter, var); if (sym.test(semantics::Symbol::Flag::OmpDeclareTarget)) lower::genDeclareTargetIntGlobal(converter, var); } int64_t Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) { for (const parser::OmpClause &clause : clauseList.v) { if (const auto &collapseClause = std::get_if(&clause.u)) { const auto *expr = semantics::GetExpr(collapseClause->v); return evaluate::ToInt64(*expr).value(); } } return 1; } void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter, const lower::pft::Variable &var) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::Location currentLocation = converter.getCurrentLocation(); const semantics::Symbol &sym = var.getSymbol(); mlir::Value symThreadprivateValue; if (const semantics::Symbol *common = semantics::FindCommonBlockContaining(sym.GetUltimate())) { mlir::Value commonValue = converter.getSymbolAddress(*common); if (mlir::isa(commonValue.getDefiningOp())) { // Generate ThreadprivateOp for a common block instead of its members and // only do it once for a common block. return; } // Generate ThreadprivateOp and rebind the common block. mlir::Value commonThreadprivateValue = firOpBuilder.create( currentLocation, commonValue.getType(), commonValue); converter.bindSymbol(*common, commonThreadprivateValue); // Generate the threadprivate value for the common block member. symThreadprivateValue = genCommonBlockMember(converter, currentLocation, sym, commonThreadprivateValue); } else if (!var.isGlobal()) { // Non-global variable which can be in threadprivate directive must be one // variable in main program, and it has implicit SAVE attribute. Take it as // with SAVE attribute, so to create GlobalOp for it to simplify the // translation to LLVM IR. // Avoids performing multiple globalInitializations. fir::GlobalOp global; auto module = converter.getModuleOp(); std::string globalName = converter.mangleName(sym); if (module.lookupSymbol(globalName)) global = module.lookupSymbol(globalName); else global = globalInitialization(converter, firOpBuilder, sym, var, currentLocation); mlir::Value symValue = firOpBuilder.create( currentLocation, global.resultType(), global.getSymbol()); symThreadprivateValue = firOpBuilder.create( currentLocation, symValue.getType(), symValue); } else { mlir::Value symValue = converter.getSymbolAddress(sym); // The symbol may be use-associated multiple times, and nothing needs to be // done after the original symbol is mapped to the threadprivatized value // for the first time. Use the threadprivatized value directly. mlir::Operation *op; if (auto declOp = symValue.getDefiningOp()) op = declOp.getMemref().getDefiningOp(); else op = symValue.getDefiningOp(); if (mlir::isa(op)) return; symThreadprivateValue = firOpBuilder.create( currentLocation, symValue.getType(), symValue); } fir::ExtendedValue sexv = converter.getSymbolExtendedValue(sym); fir::ExtendedValue symThreadprivateExv = getExtendedValue(sexv, symThreadprivateValue); converter.bindSymbol(sym, symThreadprivateExv); } // This function replicates threadprivate's behaviour of generating // an internal fir.GlobalOp for non-global variables in the main program // that have the implicit SAVE attribute, to simplifiy LLVM-IR and MLIR // generation. void Fortran::lower::genDeclareTargetIntGlobal( lower::AbstractConverter &converter, const lower::pft::Variable &var) { if (!var.isGlobal()) { // A non-global variable which can be in a declare target directive must // be a variable in the main program, and it has the implicit SAVE // attribute. We create a GlobalOp for it to simplify the translation to // LLVM IR. globalInitialization(converter, converter.getFirOpBuilder(), var.getSymbol(), var, converter.getCurrentLocation()); } } bool Fortran::lower::isOpenMPTargetConstruct( const parser::OpenMPConstruct &omp) { llvm::omp::Directive dir = llvm::omp::Directive::OMPD_unknown; if (const auto *block = std::get_if(&omp.u)) { const auto &begin = std::get(block->t); dir = std::get(begin.t).v; } else if (const auto *loop = std::get_if(&omp.u)) { const auto &begin = std::get(loop->t); dir = std::get(begin.t).v; } return llvm::omp::allTargetSet.test(dir); } void Fortran::lower::gatherOpenMPDeferredDeclareTargets( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclarativeConstruct &ompDecl, llvm::SmallVectorImpl &deferredDeclareTarget) { Fortran::common::visit( common::visitors{ [&](const parser::OpenMPDeclareTargetConstruct &ompReq) { collectDeferredDeclareTargets(converter, semaCtx, eval, ompReq, deferredDeclareTarget); }, [&](const auto &) {}, }, ompDecl.u); } bool Fortran::lower::isOpenMPDeviceDeclareTarget( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclarativeConstruct &ompDecl) { return Fortran::common::visit( common::visitors{ [&](const parser::OpenMPDeclareTargetConstruct &ompReq) { mlir::omp::DeclareTargetDeviceType targetType = getDeclareTargetFunctionDevice(converter, semaCtx, eval, ompReq) .value_or(mlir::omp::DeclareTargetDeviceType::host); return targetType != mlir::omp::DeclareTargetDeviceType::host; }, [&](const auto &) { return false; }, }, ompDecl.u); } // In certain cases such as subroutine or function interfaces which declare // but do not define or directly call the subroutine or function in the same // module, their lowering is delayed until after the declare target construct // itself is processed, so there symbol is not within the table. // // This function will also return true if we encounter any device declare // target cases, to satisfy checking if we require the requires attributes // on the module. bool Fortran::lower::markOpenMPDeferredDeclareTargetFunctions( mlir::Operation *mod, llvm::SmallVectorImpl &deferredDeclareTargets, AbstractConverter &converter) { bool deviceCodeFound = false; auto modOp = llvm::cast(mod); for (auto declTar : deferredDeclareTargets) { mlir::Operation *op = modOp.lookupSymbol(converter.mangleName(declTar.sym)); // Due to interfaces being optionally emitted on usage in a module, // not finding an operation at this point cannot be a hard error, we // simply ignore it for now. // TODO: Add semantic checks for detecting cases where an erronous // (undefined) symbol has been supplied to a declare target clause if (!op) continue; auto devType = declTar.declareTargetDeviceType; if (!deviceCodeFound && devType != mlir::omp::DeclareTargetDeviceType::host) deviceCodeFound = true; markDeclareTarget(op, converter, declTar.declareTargetCaptureClause, devType); } return deviceCodeFound; } void Fortran::lower::genOpenMPRequires(mlir::Operation *mod, const semantics::Symbol *symbol) { using MlirRequires = mlir::omp::ClauseRequires; using SemaRequires = semantics::WithOmpDeclarative::RequiresFlag; if (auto offloadMod = llvm::dyn_cast(mod)) { semantics::WithOmpDeclarative::RequiresFlags semaFlags; if (symbol) { common::visit( [&](const auto &details) { if constexpr (std::is_base_of_v>) { if (details.has_ompRequires()) semaFlags = *details.ompRequires(); } }, symbol->details()); } // Use pre-populated omp.requires module attribute if it was set, so that // the "-fopenmp-force-usm" compiler option is honored. MlirRequires mlirFlags = offloadMod.getRequires(); if (semaFlags.test(SemaRequires::ReverseOffload)) mlirFlags = mlirFlags | MlirRequires::reverse_offload; if (semaFlags.test(SemaRequires::UnifiedAddress)) mlirFlags = mlirFlags | MlirRequires::unified_address; if (semaFlags.test(SemaRequires::UnifiedSharedMemory)) mlirFlags = mlirFlags | MlirRequires::unified_shared_memory; if (semaFlags.test(SemaRequires::DynamicAllocators)) mlirFlags = mlirFlags | MlirRequires::dynamic_allocators; offloadMod.setRequires(mlirFlags); } }