//===-- Utils..cpp ----------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ // //===----------------------------------------------------------------------===// #include "Utils.h" #include "ClauseFinder.h" #include "flang/Evaluate/fold.h" #include "flang/Evaluate/tools.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include template Fortran::semantics::MaybeIntExpr EvaluateIntExpr(Fortran::semantics::SemanticsContext &context, const T &expr) { if (Fortran::semantics::MaybeExpr maybeExpr{ Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { if (auto *intExpr{ Fortran::evaluate::UnwrapExpr( *maybeExpr)}) { return std::move(*intExpr); } } return std::nullopt; } template std::optional EvaluateInt64(Fortran::semantics::SemanticsContext &context, const T &expr) { return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); } llvm::cl::opt treatIndexAsSection( "openmp-treat-index-as-section", llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."), llvm::cl::init(true)); namespace Fortran { namespace lower { namespace omp { bool requiresImplicitDefaultDeclareMapper( const semantics::DerivedTypeSpec &typeSpec) { // ISO C interoperable types (e.g., c_ptr, c_funptr) must always have implicit // default mappers available so that OpenMP offloading can correctly map them. if (semantics::IsIsoCType(&typeSpec)) return true; llvm::SmallPtrSet visited; std::function requiresMapper = [&](const semantics::DerivedTypeSpec &spec) -> bool { if (!visited.insert(&spec).second) return false; semantics::DirectComponentIterator directComponents{spec}; for (const semantics::Symbol &component : directComponents) { if (component.attrs().test(semantics::Attr::ALLOCATABLE)) return true; if (const semantics::DeclTypeSpec *declType = component.GetType()) if (const auto *nested = declType->AsDerived()) if (requiresMapper(*nested)) return true; } return false; }; return requiresMapper(typeSpec); } int64_t getCollapseValue(const List &clauses) { auto iter = llvm::find_if(clauses, [](const Clause &clause) { return clause.id == llvm::omp::Clause::OMPC_collapse; }); if (iter != clauses.end()) { const auto &collapse = std::get(iter->u); return evaluate::ToInt64(collapse.v).value(); } return 1; } void genObjectList(const ObjectList &objects, lower::AbstractConverter &converter, llvm::SmallVectorImpl &operands) { for (const Object &object : objects) { const semantics::Symbol *sym = object.sym(); assert(sym && "Expected Symbol"); if (mlir::Value variable = converter.getSymbolAddress(*sym)) { operands.push_back(variable); } else if (const auto *details = sym->detailsIf()) { operands.push_back(converter.getSymbolAddress(details->symbol())); converter.copySymbolBinding(details->symbol(), *sym); } } } mlir::Type getLoopVarType(lower::AbstractConverter &converter, std::size_t loopVarTypeSize) { // OpenMP runtime requires 32-bit or 64-bit loop variables. loopVarTypeSize = loopVarTypeSize * 8; if (loopVarTypeSize < 32) { loopVarTypeSize = 32; } else if (loopVarTypeSize > 64) { loopVarTypeSize = 64; mlir::emitWarning(converter.getCurrentLocation(), "OpenMP loop iteration variable cannot have more than 64 " "bits size and will be narrowed into 64 bits."); } assert((loopVarTypeSize == 32 || loopVarTypeSize == 64) && "OpenMP loop iteration variable size must be transformed into 32-bit " "or 64-bit"); return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize); } semantics::Symbol * getIterationVariableSymbol(const lower::pft::Evaluation &eval) { return eval.visit(common::visitors{ [&](const parser::DoConstruct &doLoop) { if (const auto &maybeCtrl = doLoop.GetLoopControl()) { using LoopControl = parser::LoopControl; if (auto *bounds = std::get_if(&maybeCtrl->u)) { using NameType = llvm::remove_cvref_tName())>; static_assert( std::is_same_v>); return bounds->Name().thing.symbol; } } return static_cast(nullptr); }, [](auto &&) { return static_cast(nullptr); }, }); } void gatherFuncAndVarSyms( const ObjectList &objects, mlir::omp::DeclareTargetCaptureClause clause, llvm::SmallVectorImpl &symbolAndClause, bool automap) { for (const Object &object : objects) symbolAndClause.emplace_back(clause, *object.sym(), automap); } // This function gathers the individual omp::Object's that make up a // larger omp::Object symbol. // // For example, provided the larger symbol: "parent%child%member", this // function breaks it up into its constituent components ("parent", // "child", "member"), so we can access each individual component and // introspect details. Important to note is this function breaks it up from // RHS to LHS ("member" to "parent") and then we reverse it so that the // returned omp::ObjectList is LHS to RHS, with the "parent" at the // beginning. omp::ObjectList gatherObjectsOf(omp::Object derivedTypeMember, semantics::SemanticsContext &semaCtx) { omp::ObjectList objList; std::optional baseObj = derivedTypeMember; while (baseObj.has_value()) { objList.push_back(baseObj.value()); baseObj = getBaseObject(baseObj.value(), semaCtx); } return omp::ObjectList{llvm::reverse(objList)}; } // This function generates a series of indices from a provided omp::Object, // that devolves to an ArrayRef symbol, e.g. "array(2,3,4)", this function // would generate a series of indices of "[1][2][3]" for the above example, // offsetting by -1 to account for the non-zero fortran indexes. // // These indices can then be provided to a coordinate operation or other // GEP-like operation to access the relevant positional member of the // array. // // It is of note that the function only supports subscript integers currently // and not Triplets i.e. Array(1:2:3). static void generateArrayIndices(lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder, lower::StatementContext &stmtCtx, mlir::Location clauseLocation, llvm::SmallVectorImpl &indices, omp::Object object) { auto maybeRef = evaluate::ExtractDataRef(*object.ref()); if (!maybeRef) return; auto *arr = std::get_if(&maybeRef->u); if (!arr) return; for (auto v : arr->subscript()) { if (std::holds_alternative(v.u)) TODO(clauseLocation, "Triplet indexing in map clause is unsupported"); auto expr = std::get(v.u); mlir::Value subscript = fir::getBase(converter.genExprValue(toEvExpr(expr.value()), stmtCtx)); indices.push_back(firOpBuilder.createConvert( clauseLocation, firOpBuilder.getIndexType(), subscript)); } } /// When mapping members of derived types, there is a chance that one of the /// members along the way to a mapped member is an descriptor. In which case /// we have to make sure we generate a map for those along the way otherwise /// we will be missing a chunk of data required to actually map the member /// type to device. This function effectively generates these maps and the /// appropriate data accesses required to generate these maps. It will avoid /// creating duplicate maps, as duplicates are just as bad as unmapped /// descriptor data in a lot of cases for the runtime (and unnecessary /// data movement should be avoided where possible). /// /// As an example for the following mapping: /// /// type :: vertexes /// integer(4), allocatable :: vertexx(:) /// integer(4), allocatable :: vertexy(:) /// end type vertexes /// /// type :: dtype /// real(4) :: i /// type(vertexes), allocatable :: vertexes(:) /// end type dtype /// /// type(dtype), allocatable :: alloca_dtype /// /// !$omp target map(tofrom: alloca_dtype%vertexes(N1)%vertexx) /// /// The below HLFIR/FIR is generated (trimmed for conciseness): /// /// On the first iteration we index into the record type alloca_dtype /// to access "vertexes", we then generate a map for this descriptor /// alongside bounds to indicate we only need the 1 member, rather than /// the whole array block in this case (In theory we could map its /// entirety at the cost of data transfer bandwidth). /// /// %13:2 = hlfir.declare ... "alloca_dtype" ... /// %39 = fir.load %13#0 : ... /// %40 = fir.coordinate_of %39, %c1 : ... /// %51 = omp.map.info var_ptr(%40 : ...) map_clauses(to) capture(ByRef) ... /// %52 = fir.load %40 : ... /// /// Second iteration generating access to "vertexes(N1) utilising the N1 index /// %53 = load N1 ... /// %54 = fir.convert %53 : (i32) -> i64 /// %55 = fir.convert %54 : (i64) -> index /// %56 = arith.subi %55, %c1 : index /// %57 = fir.coordinate_of %52, %56 : ... /// /// Still in the second iteration we access the allocatable member "vertexx", /// we return %58 from the function and provide it to the final and "main" /// map of processMap (generated by the record type segment of the below /// function), if this were not the final symbol in the list, i.e. we accessed /// a member below vertexx, we would have generated the map below as we did in /// the first iteration and then continue to generate further coordinates to /// access further components as required. /// /// %58 = fir.coordinate_of %57, %c0 : ... /// %61 = omp.map.info var_ptr(%58 : ...) map_clauses(to) capture(ByRef) ... /// /// Parent mapping containing prior generated mapped members, generated at /// a later step but here to showcase the "end" result /// /// omp.map.info var_ptr(%13#1 : ...) map_clauses(to) capture(ByRef) /// members(%50, %61 : [0, 1, 0], [0, 1, 0] : ... /// /// \param objectList - The list of omp::Object symbol data for each parent /// to the mapped member (also includes the mapped member), generated via /// gatherObjectsOf. /// \param indices - List of index data associated with the mapped member /// symbol, which identifies the placement of the member in its parent, /// this helps generate the appropriate member accesses. These indices /// can be generated via generateMemberPlacementIndices. /// \param asFortran - A string generated from the mapped variable to be /// associated with the main map, generally (but not restricted to) /// generated via gatherDataOperandAddrAndBounds or other /// DirectiveCommons.hpp utilities. /// \param mapTypeBits - The map flags that will be associated with the /// generated maps, minus alterations of the TO and FROM bits for the /// intermediate components to prevent accidental overwriting on device /// write back. mlir::Value createParentSymAndGenIntermediateMaps( mlir::Location clauseLocation, lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, omp::ObjectList &objectList, llvm::SmallVectorImpl &indices, OmpMapParentAndMemberData &parentMemberIndices, llvm::StringRef asFortran, mlir::omp::ClauseMapFlags mapTypeBits) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); /// Checks if an omp::Object is an array expression with a subscript, e.g. /// array(1,2). auto isArrayExprWithSubscript = [](omp::Object obj) { if (auto maybeRef = evaluate::ExtractDataRef(obj.ref())) { evaluate::DataRef ref = *maybeRef; if (auto *arr = std::get_if(&ref.u)) return !arr->subscript().empty(); } return false; }; // Generate the access to the original parent base address. fir::factory::AddrAndBoundsInfo parentBaseAddr = lower::getDataOperandBaseAddr(converter, firOpBuilder, *objectList[0].sym(), clauseLocation); mlir::Value curValue = parentBaseAddr.addr; // Iterate over all objects in the objectList, this should consist of all // record types between the parent and the member being mapped (including // the parent). The object list may also contain array objects as well, // this can occur when specifying bounds or a specific element access // within a member map, we skip these. size_t currentIndicesIdx = 0; for (size_t i = 0; i < objectList.size(); ++i) { // If we encounter a sequence type, i.e. an array, we must generate the // correct coordinate operation to index into the array to proceed further, // this is only relevant in cases where we encounter subscripts currently. // // For example in the following case: // // map(tofrom: array_dtype(4)%internal_dtypes(3)%float_elements(4)) // // We must generate coordinate operation accesses for each subscript // we encounter. if (fir::SequenceType arrType = mlir::dyn_cast( fir::unwrapPassByRefType(curValue.getType()))) { if (isArrayExprWithSubscript(objectList[i])) { llvm::SmallVector subscriptIndices; generateArrayIndices(converter, firOpBuilder, stmtCtx, clauseLocation, subscriptIndices, objectList[i]); assert(!subscriptIndices.empty() && "missing expected indices for map clause"); if (auto boxTy = llvm::dyn_cast(curValue.getType())) { // To accommodate indexing into box types of all dimensions including // negative dimensions we have to take into consideration the lower // bounds and extents of the data (stored in the box) and convey it // to the ArrayCoorOp so that it can appropriately access the element // utilising the subscript we provide and the runtime sizes stored in // the Box. To do so we need to generate a ShapeShiftOp which combines // both the lb (ShiftOp) and extent (ShapeOp) of the Box, giving the // ArrayCoorOp the spatial information it needs to calculate the // underlying address. mlir::Value shapeShift = Fortran::lower::getShapeShift( firOpBuilder, clauseLocation, curValue); auto addrOp = fir::BoxAddrOp::create(firOpBuilder, clauseLocation, curValue); curValue = fir::ArrayCoorOp::create( firOpBuilder, clauseLocation, firOpBuilder.getRefType(arrType.getEleTy()), addrOp, shapeShift, /*slice=*/mlir::Value{}, subscriptIndices, /*typeparms=*/mlir::ValueRange{}); } else { // We're required to negate by one in the non-Box case as I believe // we do not have the shape generated from the dimensions to help // adjust the indexing. // TODO/FIXME: This may need adjusted to support bounds of unusual // dimensions, if that's the case then it is likely best to fold this // branch into the above. mlir::Value one = firOpBuilder.createIntegerConstant( clauseLocation, firOpBuilder.getIndexType(), 1); for (auto &v : subscriptIndices) v = mlir::arith::SubIOp::create(firOpBuilder, clauseLocation, v, one); curValue = fir::CoordinateOp::create( firOpBuilder, clauseLocation, firOpBuilder.getRefType(arrType.getEleTy()), curValue, subscriptIndices); } } } // If we encounter a record type, we must access the subsequent member // by indexing into it and creating a coordinate operation to do so, we // utilise the index information generated previously and passed in to // work out the correct member to access and the corresponding member // type. if (fir::RecordType recordType = mlir::dyn_cast( fir::unwrapPassByRefType(curValue.getType()))) { fir::IntOrValue idxConst = mlir::IntegerAttr::get( firOpBuilder.getI32Type(), indices[currentIndicesIdx]); mlir::Type memberTy = recordType.getType(indices[currentIndicesIdx]); curValue = fir::CoordinateOp::create( firOpBuilder, clauseLocation, firOpBuilder.getRefType(memberTy), curValue, llvm::SmallVector{idxConst}); // If we're a final member, the map will be generated by the processMap // call that invoked this function. if (currentIndicesIdx == indices.size() - 1) break; // Skip mapping and the subsequent load if we're not // a type with a descriptor such as a pointer/allocatable. If we're not a // type with a descriptor then we have no need of generating an // intermediate map for it, as we only need to generate a map if a member // is a descriptor type (and thus obscures the members it contains via a // pointer in which it's data needs mapped). if (!fir::isTypeWithDescriptor(memberTy)) { currentIndicesIdx++; continue; } llvm::SmallVector interimIndices( indices.begin(), std::next(indices.begin(), currentIndicesIdx + 1)); // Verify we haven't already created a map for this particular member, by // checking the list of members already mapped for the current parent, // stored in the parentMemberIndices structure if (!parentMemberIndices.isDuplicateMemberMapInfo(interimIndices)) { // Generate bounds operations using the standard lowering utility, // unfortunately this currently does a bit more than just generate // bounds and we discard the other bits. May be useful to extend the // utility to just provide bounds in the future. llvm::SmallVector interimBounds; if (i + 1 < objectList.size() && objectList[i + 1].sym()->IsObjectArray()) { std::stringstream interimFortran; Fortran::lower::gatherDataOperandAddrAndBounds< mlir::omp::MapBoundsOp, mlir::omp::MapBoundsType>( converter, converter.getFirOpBuilder(), semaCtx, converter.getFctCtx(), *objectList[i + 1].sym(), objectList[i + 1].ref(), clauseLocation, interimFortran, interimBounds, treatIndexAsSection); } // Remove all map-type bits (e.g. TO, FROM, etc.) from the intermediate // allocatable maps, as we simply wish to alloc or release them. It may // be safer to just pass OMP_MAP_NONE as the map type, but we may still // need some of the other map types the mapped member utilises, so for // now it's good to keep an eye on this. mlir::omp::ClauseMapFlags interimMapType = mapTypeBits; interimMapType &= ~mlir::omp::ClauseMapFlags::to; interimMapType &= ~mlir::omp::ClauseMapFlags::from; interimMapType &= ~mlir::omp::ClauseMapFlags::return_param; // Create a map for the intermediate member and insert it and it's // indices into the parentMemberIndices list to track it. mlir::omp::MapInfoOp mapOp = utils::openmp::createMapInfoOp( firOpBuilder, clauseLocation, curValue, /*varPtrPtr=*/mlir::Value{}, asFortran, /*bounds=*/interimBounds, /*members=*/{}, /*membersIndex=*/mlir::ArrayAttr{}, interimMapType, mlir::omp::VariableCaptureKind::ByRef, curValue.getType()); parentMemberIndices.memberPlacementIndices.push_back(interimIndices); parentMemberIndices.memberMap.push_back(mapOp); } // Load the currently accessed member, so we can continue to access // further segments. curValue = fir::LoadOp::create(firOpBuilder, clauseLocation, curValue); currentIndicesIdx++; } } return curValue; } static int64_t getComponentPlacementInParent(const semantics::Symbol *componentSym) { const auto *derived = componentSym->owner() .derivedTypeSpec() ->typeSymbol() .detailsIf(); assert(derived && "expected derived type details when processing component symbol"); for (auto [placement, name] : llvm::enumerate(derived->componentNames())) if (name == componentSym->name()) return placement; return -1; } static std::optional getComponentObject(std::optional object, semantics::SemanticsContext &semaCtx) { if (!object) return std::nullopt; auto ref = evaluate::ExtractDataRef(object.value().ref()); if (!ref) return std::nullopt; if (std::holds_alternative(ref->u)) return object; auto baseObj = getBaseObject(object.value(), semaCtx); if (!baseObj) return std::nullopt; return getComponentObject(baseObj.value(), semaCtx); } void generateMemberPlacementIndices(const Object &object, llvm::SmallVectorImpl &indices, semantics::SemanticsContext &semaCtx) { assert(indices.empty() && "indices vector passed to " "generateMemberPlacementIndices should be empty"); auto compObj = getComponentObject(object, semaCtx); while (compObj) { int64_t index = getComponentPlacementInParent(compObj->sym()); assert( index >= 0 && "unexpected index value returned from getComponentPlacementInParent"); indices.push_back(index); compObj = getComponentObject(getBaseObject(compObj.value(), semaCtx), semaCtx); } indices = llvm::SmallVector{llvm::reverse(indices)}; } void OmpMapParentAndMemberData::addChildIndexAndMapToParent( const omp::Object &object, mlir::omp::MapInfoOp &mapOp, semantics::SemanticsContext &semaCtx) { llvm::SmallVector indices; generateMemberPlacementIndices(object, indices, semaCtx); memberPlacementIndices.push_back(indices); memberMap.push_back(mapOp); } bool isMemberOrParentAllocatableOrPointer( const Object &object, semantics::SemanticsContext &semaCtx) { if (semantics::IsAllocatableOrObjectPointer(object.sym())) return true; auto compObj = getBaseObject(object, semaCtx); while (compObj) { if (semantics::IsAllocatableOrObjectPointer(compObj.value().sym())) return true; compObj = getBaseObject(compObj.value(), semaCtx); } return false; } void insertChildMapInfoIntoParent( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, std::map &parentMemberIndices, llvm::SmallVectorImpl &mapOperands, llvm::SmallVectorImpl &mapSyms) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); for (auto indices : parentMemberIndices) { auto *parentIter = llvm::find_if(mapSyms, [&indices](const semantics::Symbol *v) { return v == indices.first.sym(); }); if (parentIter != mapSyms.end()) { auto mapOp = llvm::cast( mapOperands[std::distance(mapSyms.begin(), parentIter)] .getDefiningOp()); // Once explicit members are attached to a parent map, do not also invoke // a declare mapper on it, otherwise the mapper would remap the same // components leading to duplicate mappings at runtime. if (!indices.second.memberMap.empty() && mapOp.getMapperIdAttr()) mapOp.setMapperIdAttr(nullptr); // NOTE: To maintain appropriate SSA ordering, we move the parent map // which will now have references to its children after the last // of its members to be generated. This is necessary when a user // has defined a series of parent and children maps where the parent // precedes the children. An alternative, may be to do // delayed generation of map info operations from the clauses and // organize them first before generation. Or to use the // topologicalSort utility which will enforce a stronger SSA // dominance ordering at the cost of efficiency/time. mapOp->moveAfter(indices.second.memberMap.back()); for (mlir::omp::MapInfoOp memberMap : indices.second.memberMap) mapOp.getMembersMutable().append(memberMap.getResult()); mapOp.setMembersIndexAttr(firOpBuilder.create2DI64ArrayAttr( indices.second.memberPlacementIndices)); } else { // NOTE: We take the map type of the first child, this may not // be the correct thing to do, however, we shall see. For the moment // it allows this to work with enter and exit without causing MLIR // verification issues. The more appropriate thing may be to take // the "main" map type clause from the directive being used. mlir::omp::ClauseMapFlags mapType = indices.second.memberMap[0].getMapType(); llvm::SmallVector members; members.reserve(indices.second.memberMap.size()); for (mlir::omp::MapInfoOp memberMap : indices.second.memberMap) members.push_back(memberMap.getResult()); // Create parent to emplace and bind members llvm::SmallVector bounds; std::stringstream asFortran; fir::factory::AddrAndBoundsInfo info = lower::gatherDataOperandAddrAndBounds( converter, firOpBuilder, semaCtx, converter.getFctCtx(), *indices.first.sym(), indices.first.ref(), converter.getCurrentLocation(), asFortran, bounds, treatIndexAsSection); mlir::omp::MapInfoOp mapOp = utils::openmp::createMapInfoOp( firOpBuilder, info.rawInput.getLoc(), info.rawInput, /*varPtrPtr=*/mlir::Value(), asFortran.str(), bounds, members, firOpBuilder.create2DI64ArrayAttr( indices.second.memberPlacementIndices), mapType, mlir::omp::VariableCaptureKind::ByRef, info.rawInput.getType(), /*partialMap=*/true); mapOperands.push_back(mapOp); mapSyms.push_back(indices.first.sym()); } } } void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp, mlir::Location loc) { using Lastprivate = omp::clause::Lastprivate; auto &maybeMod = std::get>(lastp.t); if (maybeMod) { assert(*maybeMod == Lastprivate::LastprivateModifier::Conditional && "Unexpected lastprivate modifier"); TODO(loc, "lastprivate clause with CONDITIONAL modifier"); } } static void convertLoopBounds(lower::AbstractConverter &converter, mlir::Location loc, mlir::omp::LoopRelatedClauseOps &result, std::size_t loopVarTypeSize) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // The types of lower bound, upper bound, and step are converted into the // type of the loop variable if necessary. mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); for (unsigned it = 0; it < (unsigned)result.loopLowerBounds.size(); it++) { result.loopLowerBounds[it] = firOpBuilder.createConvert( loc, loopVarType, result.loopLowerBounds[it]); result.loopUpperBounds[it] = firOpBuilder.createConvert( loc, loopVarType, result.loopUpperBounds[it]); result.loopSteps[it] = firOpBuilder.createConvert(loc, loopVarType, result.loopSteps[it]); } } // Helper function that finds the sizes clause in a inner OMPD_tile directive // and passes the sizes clause to the callback function if found. static void processTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, std::function processFun) { if (!ompCons) return; if (auto *ompLoop{std::get_if(&ompCons->u)}) { if (auto *innerConstruct = ompLoop->GetNestedConstruct()) { const parser::OmpDirectiveSpecification &innerBeginSpec = innerConstruct->BeginDir(); if (innerBeginSpec.DirId() == llvm::omp::Directive::OMPD_tile) { // Get the size values from parse tree and convert to a vector. if (auto *clause = parser::omp::FindClause( innerBeginSpec, llvm::omp::Clause::OMPC_sizes)) processFun(&std::get(clause->u)); } } } } pft::Evaluation *getNestedDoConstruct(pft::Evaluation &eval) { for (pft::Evaluation &nested : eval.getNestedEvaluations()) { // In an OpenMPConstruct there can be compiler directives: // 1 <> // 2 CompilerDirective: !unroll // <> -> 8 if (nested.getIf()) continue; // Within a DoConstruct, there can be compiler directives, plus // there is a DoStmt before the body: // <> -> 8 // 3 NonLabelDoStmt -> 7: do i = 1, n // <> -> 7 if (nested.getIf()) continue; assert(nested.getIf() && "Unexpected construct in the nested evaluations"); return &nested; } llvm_unreachable("Expected do loop to be in the nested evaluations"); } /// Populates the sizes vector with values if the given OpenMPConstruct /// contains a loop construct with an inner tiling construct. void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, llvm::SmallVectorImpl &tileSizes, Fortran::semantics::SemanticsContext &semaCtx) { processTileSizesFromOpenMPConstruct( ompCons, [&](const parser::OmpClause::Sizes *tclause) { for (auto &tval : tclause->v) if (const auto v{EvaluateInt64(semaCtx, tval)}) tileSizes.push_back(*v); }); } int64_t collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, lower::pft::Evaluation *nestedEval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { int64_t numCollapse = 1; // Collect the loops to collapse. lower::pft::Evaluation *doConstructEval = nestedEval; if (doConstructEval->getIf()->IsDoConcurrent()) { TODO(currentLocation, "Do Concurrent in Worksharing loop construct"); } std::int64_t collapseValue = 1l; if (auto *clause = ClauseFinder::findUniqueClause(clauses)) { collapseValue = evaluate::ToInt64(clause->v).value(); numCollapse = collapseValue; } collectLoopRelatedInfo(converter, currentLocation, eval, nestedEval, numCollapse, result, iv); return numCollapse; } void collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, lower::pft::Evaluation *nestedEval, int64_t numCollapse, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // Collect the loops to collapse. lower::pft::Evaluation *doConstructEval = nestedEval; if (doConstructEval->getIf()->IsDoConcurrent()) { TODO(currentLocation, "Do Concurrent in Worksharing loop construct"); } // Collect sizes from tile directive if present. std::int64_t sizesLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { processTileSizesFromOpenMPConstruct( ompCons, [&](const parser::OmpClause::Sizes *tclause) { sizesLengthValue = tclause->v.size(); }); } std::int64_t collapseValue = std::max(numCollapse, sizesLengthValue); std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = &doConstructEval->getFirstNestedEvaluation(); auto *doStmt = doLoop->getIf(); assert(doStmt && "Expected do loop to be in the nested evaluation"); const auto &loopControl = std::get>(doStmt->t); const parser::LoopControl::Bounds *bounds = std::get_if(&loopControl->u); assert(bounds && "Expected bounds for worksharing do loop"); lower::StatementContext stmtCtx; result.loopLowerBounds.push_back(fir::getBase( converter.genExprValue(*semantics::GetExpr(bounds->Lower()), stmtCtx))); result.loopUpperBounds.push_back(fir::getBase( converter.genExprValue(*semantics::GetExpr(bounds->Upper()), stmtCtx))); if (auto &step = bounds->Step()) { result.loopSteps.push_back(fir::getBase( converter.genExprValue(*semantics::GetExpr(step), stmtCtx))); } else { // If `step` is not present, assume it as `1`. result.loopSteps.push_back(firOpBuilder.createIntegerConstant( currentLocation, firOpBuilder.getIntegerType(32), 1)); } iv.push_back(bounds->Name().thing.symbol); loopVarTypeSize = std::max( loopVarTypeSize, bounds->Name().thing.symbol->GetUltimate().size()); if (--collapseValue) doConstructEval = getNestedDoConstruct(*doConstructEval); } while (collapseValue > 0); convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); } // Lower an affinity object to the raw storage address. // The lowering paths feeding this helper are mixed: some produce HLFIR // entities such as hlfir.designate/hlfir.declare, while others already // produce raw FIR addresses such as fir.box_addr. Normalize entity-like values // to a raw address, and leave already-raw addresses unchanged. mlir::Value genAffinityAddr(Fortran::lower::AbstractConverter &converter, const omp::Object &object, Fortran::lower::StatementContext &stmtCtx, mlir::Location loc) { fir::FirOpBuilder &builder = converter.getFirOpBuilder(); auto genRawAddress = [&](mlir::Value v) -> mlir::Value { // Examples seen here include hlfir.designate for a(i), hlfir.declare for // whole objects like dummy/character arrays, fir.load of a pointer box, // and already-raw fir.box_addr results. Only the entity-like cases can be // wrapped as hlfir::Entity; the raw address cases must be returned as-is. if (!hlfir::isFortranEntity(v)) return v; hlfir::Entity entity{v}; // Pointer/allocatable entities need to be dereferenced first so affinity // uses the pointee storage rather than the box address. entity = hlfir::derefPointersAndAllocatables(loc, builder, entity); return hlfir::genVariableRawAddress(loc, builder, entity); }; // Designators such as affinity(a(3)) or affinity(a(1:10)) lower through // genExprAddr. The base may still be an HLFIR entity, or may already be a // raw FIR address after earlier lowering. if (auto expr = object.ref()) { fir::ExtendedValue exv = converter.genExprAddr(toEvExpr(*expr), stmtCtx, &loc); mlir::Value baseAddr = fir::getBase(exv); return genRawAddress(baseAddr); } // Whole objects such as affinity(a) come from the symbol address directly. const Fortran::semantics::Symbol *sym = object.sym(); assert(sym && "expected symbol in affinity object"); mlir::Value symAddr = converter.getSymbolAddress(*sym); return genRawAddress(symAddr); } // Compute the size in bytes of a single element described by an HLFIR entity. // This returns the per-element byte size only; callers handle any array extent // or section span separately. mlir::Value genElementSizeInBytes(fir::FirOpBuilder &builder, mlir::Location loc, const mlir::DataLayout &dl, hlfir::Entity entity) { // Boxed entities carry the runtime element size in the descriptor. if (entity.isBoxAddressOrValue()) return fir::ConvertOp::create( builder, loc, builder.getI64Type(), fir::BoxEleSizeOp::create(builder, loc, builder.getIndexType(), entity)); mlir::Type elemTy = entity.getFortranElementType(); if (auto charTy = mlir::dyn_cast(elemTy)) { // Non-box character entities expose length separately; multiply it by the // character kind byte width. mlir::Value charLen = hlfir::genCharLength(loc, builder, entity); mlir::Value charBytes = builder.createIntegerConstant( loc, builder.getI64Type(), charTy.getFKind()); return mlir::arith::MulIOp::create( builder, loc, fir::ConvertOp::create(builder, loc, builder.getI64Type(), charLen), charBytes); } // PDTs with length parameters and assumed-rank entities do not currently // have a precise byte size here, so keep the existing conservative 0. if (fir::isRecordWithTypeParameters(elemTy) || entity.isAssumedRank()) return builder.createIntegerConstant(loc, builder.getI64Type(), 0); // Trivial non-box entities have a fixed element size in the data layout. return builder.createIntegerConstant( loc, builder.getI64Type(), static_cast(dl.getTypeSize(elemTy))); } // Compute the total number of elements in a whole affinity object. static mlir::Value getTotalElements(fir::FirOpBuilder &builder, mlir::Location loc, hlfir::Entity entity) { if (entity.isAssumedRank()) return builder.createIntegerConstant(loc, builder.getI64Type(), 0); assert(!entity.isScalar() && "expected non-scalar entity to compute total elements"); mlir::Value total = builder.createIntegerConstant(loc, builder.getIndexType(), 1); for (mlir::Value extent : hlfir::genExtentsVector(loc, builder, entity)) total = mlir::arith::MulIOp::create(builder, loc, total, extent); return fir::ConvertOp::create(builder, loc, builder.getI64Type(), total); } // Compute the contiguous element span covered by an array section. // This is not the number of selected elements. Instead, it is the inclusive // distance from the lowest addressed element in the section to the highest // addressed element, using Fortran column-major layout. genAffinityLen later // multiplies this span by the element size to get the byte length. // // For each dimension d: // delta_d = upper_d - lower_d // distance_d = product(fullExtents[0..d-1]) // with distance_0 = 1. // // Example: // integer :: a(5, 7) // !$omp task affinity(a(2:4, 3:5)) // The section selects 9 elements, but its contiguous span runs from a(2,3) to // a(4,5). In linearized column-major indices, those are 11 and 23, so the // span is 23 - 11 + 1 = 13 elements. // // Strides in the section bounds do not change this computation: the span still // covers the full contiguous address range between the first and last element. static mlir::Value computeBoundsSpan(fir::FirOpBuilder &builder, mlir::Location loc, llvm::ArrayRef bounds, hlfir::Entity entity) { assert(!bounds.empty() && "expected non-empty bounds to compute span"); auto fullExtents = hlfir::genExtentsVector(loc, builder, entity); assert(fullExtents.size() == bounds.size() && "expected bounds and full extents to have the same size"); mlir::Value one = builder.createIntegerConstant(loc, builder.getIndexType(), 1); mlir::Value span = one; // inclusive: +1 mlir::Value distance = one; // column-major linearization factor for (auto [b, extent] : llvm::zip(bounds, fullExtents)) { auto mb = b.getDefiningOp(); assert(mb && "expected omp.map_bounds for affinity section span"); mlir::Value delta = mlir::arith::SubIOp::create( builder, loc, mb.getUpperBound(), mb.getLowerBound()); span = mlir::arith::AddIOp::create( builder, loc, span, mlir::arith::MulIOp::create(builder, loc, delta, distance)); distance = mlir::arith::MulIOp::create(builder, loc, distance, extent); } // Convert from index to i64 (bounds are in index type) return fir::ConvertOp::create(builder, loc, builder.getI64Type(), span); } // Compute the byte length covered by an affinity object. // For a scalar or single element, this is the element size. For a section, it // is the span of the section in elements multiplied by the element size. For a // whole array object, it is the total number of elements multiplied by the // element size. mlir::Value genAffinityLen(fir::FirOpBuilder &builder, mlir::Location loc, const mlir::DataLayout &dl, hlfir::Entity entity, llvm::ArrayRef bounds) { mlir::Value elemBytes = genElementSizeInBytes(builder, loc, dl, entity); // Scalar entities and single designated elements contribute exactly one // element to the affinity object. if (entity.isScalar()) return elemBytes; if (!bounds.empty()) { // Array sections carry explicit bounds describing the covered span. mlir::Value spanElems = computeBoundsSpan(builder, loc, bounds, entity); return mlir::arith::MulIOp::create(builder, loc, spanElems, elemBytes); } // Whole-array objects have no explicit bounds here, so use the extents of // the entity itself. return mlir::arith::MulIOp::create( builder, loc, getTotalElements(builder, loc, entity), elemBytes); } bool hasIteratorIVReference( const omp::Object &object, const llvm::SmallPtrSetImpl &ivSyms) { auto ref = object.ref(); if (!ref) return false; Fortran::lower::SomeExpr expr = toEvExpr(*ref); for (Fortran::evaluate::SymbolRef s : CollectSymbols(expr)) { const Fortran::semantics::Symbol &ult = s->GetUltimate(); if (ivSyms.contains(&ult)) return true; } return false; } void defaultMangler(Fortran::lower::AbstractConverter &converter, std::string &mapperIdName, llvm::StringRef memberName) { if (auto *sym = converter.getCurrentScope().FindSymbol(mapperIdName)) mapperIdName = converter.mangleName(mapperIdName, sym->owner()); else if (auto *memberSym = converter.getCurrentScope().FindSymbol(memberName.str())) mapperIdName = converter.mangleName(mapperIdName, memberSym->owner()); } // Build the array coordinate for an object that uses iterator variables. // If the object is a section, use the first element of that section // as the coordinate. Currently only support top-level ArrayRef designators. // // Examples: // a(i, j) -> coordinates for a(i, j) // a(i:i+1, j+2) -> coordinates for a(i, j+2) std::optional> getIteratorElementIndices( Fortran::lower::AbstractConverter &converter, const omp::Object &object, Fortran::lower::StatementContext &stmtCtx, mlir::Location loc) { const std::optional &ref = object.ref(); assert(ref && "expected iterator-dependent object to have a reference"); std::optional dataRef = Fortran::evaluate::ExtractDataRef(*ref); if (!dataRef) return std::nullopt; const auto *arrayRef = std::get_if(&dataRef->u); if (!arrayRef || arrayRef->subscript().empty()) return std::nullopt; auto &builder = converter.getFirOpBuilder(); const Fortran::semantics::Symbol *sym = object.sym(); assert(sym && "expected symbol for iterator-dependent object"); fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(*sym); mlir::Value one = builder.createIntegerConstant(loc, builder.getIndexType(), 1); llvm::SmallVector indices; indices.reserve(arrayRef->subscript().size()); for (const auto &[dim, subscript] : llvm::enumerate(arrayRef->subscript())) { mlir::Value idx; if (const auto *triplet = std::get_if(&subscript.u)) { // Sections use the first element of the section as the base address, so // the coordinate for this dimension comes from the triplet lower bound. std::optional< Fortran::evaluate::Expr> lowerBound = triplet->lower(); if (!lowerBound) { // Get lower bound if not provided by user. // For example: !$omp task affinity(iterator(i = 1:n, j = 1:m) : a(:i+1, // j+2)) idx = fir::factory::readLowerBound(builder, loc, dataExv, dim, one); } else { idx = fir::getBase( createSomeExtendedExpression(loc, converter, toEvExpr(*lowerBound), converter.getSymbolMap(), stmtCtx)); } } else { // Not handling vector subscripts for now. if (subscript.Rank() > 0) return std::nullopt; const auto *indirect = std::get_if( &subscript.u); assert(indirect && "expected non-triplet subscript"); // Scalar subscripts, including reordered indices and expressions like // i+1 or j+2, lower directly through expression lowering. idx = fir::getBase(createSomeExtendedExpression( loc, converter, toEvExpr(indirect->value()), converter.getSymbolMap(), stmtCtx)); } indices.push_back(idx); } return indices; } // Build the element address for an iterator-dependent affinity object from a // base entity and lowered indices. mlir::Value genIteratorCoordinate(Fortran::lower::AbstractConverter &converter, hlfir::Entity entity, llvm::ArrayRef ivs, mlir::Location loc) { auto &builder = converter.getFirOpBuilder(); mlir::Value base = entity.getBase(); // If base is a reference-to-box, load it so array_coor sees the box value if (auto refTy = mlir::dyn_cast(base.getType())) { if (mlir::isa(refTy.getEleTy())) base = fir::LoadOp::create(builder, loc, base); } // Build shape from the entity extents mlir::Value shape; auto extents = hlfir::genExtentsVector(loc, builder, entity); assert(extents.size() == ivs.size() && "expected the number of extents and iteration variables to match for " "iterator"); if (entity.mayHaveNonDefaultLowerBounds()) { llvm::SmallVector lowerBounds; lowerBounds.reserve(ivs.size()); for (unsigned dim = 0; dim < ivs.size(); ++dim) lowerBounds.push_back(hlfir::genLBound(loc, builder, entity, dim)); shape = builder.genShape(loc, lowerBounds, extents); } else { shape = fir::ShapeOp::create(builder, loc, extents); } mlir::Type elementToRefTy = fir::ReferenceType::get(entity.getFortranElementType()); return fir::ArrayCoorOp::create(builder, loc, elementToRefTy, /*memref=*/base, /*shape=*/shape, /*slice=*/mlir::Value{}, /*indices=*/ivs, /*typeparams=*/mlir::ValueRange{}); } } // namespace omp } // namespace lower } // namespace Fortran