diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td b/flang/include/flang/Optimizer/OpenMP/Passes.td index 704faf0ccd85..0bff58f0f639 100644 --- a/flang/include/flang/Optimizer/OpenMP/Passes.td +++ b/flang/include/flang/Optimizer/OpenMP/Passes.td @@ -112,4 +112,15 @@ def GenericLoopConversionPass ]; } +def AutomapToTargetDataPass + : Pass<"omp-automap-to-target-data", "::mlir::ModuleOp"> { + let summary = "Insert OpenMP target data operations for AUTOMAP variables"; + let description = [{ + Inserts `omp.target_enter_data` and `omp.target_exit_data` operations to + map variables marked with the `AUTOMAP` modifier when their allocation + or deallocation is detected in the FIR. + }]; + let dependentDialects = ["mlir::omp::OpenMPDialect"]; +} + #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES diff --git a/flang/include/flang/Support/OpenMP-utils.h b/flang/include/flang/Support/OpenMP-utils.h index 6d9db2b682c5..a94640b91a04 100644 --- a/flang/include/flang/Support/OpenMP-utils.h +++ b/flang/include/flang/Support/OpenMP-utils.h @@ -9,6 +9,8 @@ #ifndef FORTRAN_SUPPORT_OPENMP_UTILS_H_ #define FORTRAN_SUPPORT_OPENMP_UTILS_H_ +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Semantics/symbol.h" #include "mlir/IR/Builders.h" @@ -72,6 +74,14 @@ struct EntryBlockArgs { /// \param [in] region - Empty region in which to create the entry block. mlir::Block *genEntryBlock( mlir::OpBuilder &builder, const EntryBlockArgs &args, mlir::Region ®ion); + +// Returns true if the variable has a dynamic size and therefore requires +// bounds operations to describe its extents. +bool needsBoundsOps(mlir::Value var); + +// Generate MapBoundsOp operations for the variable if required. +void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var, + llvm::SmallVectorImpl &boundsOps); } // namespace Fortran::common::openmp #endif // FORTRAN_SUPPORT_OPENMP_UTILS_H_ diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp new file mode 100644 index 000000000000..8eeff66921db --- /dev/null +++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp @@ -0,0 +1,130 @@ +//===- AutomapToTargetData.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/HLFIRTools.h" +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/Dialect/Support/KindMapping.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Support/OpenMP-utils.h" + +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/Operation.h" +#include "mlir/Pass/Pass.h" + +#include "llvm/Frontend/OpenMP/OMPConstants.h" + +namespace flangomp { +#define GEN_PASS_DEF_AUTOMAPTOTARGETDATAPASS +#include "flang/Optimizer/OpenMP/Passes.h.inc" +} // namespace flangomp + +using namespace mlir; +using namespace Fortran::common::openmp; + +namespace { +class AutomapToTargetDataPass + : public flangomp::impl::AutomapToTargetDataPassBase< + AutomapToTargetDataPass> { + void findRelatedAllocmemFreemem(fir::AddrOfOp addressOfOp, + llvm::DenseSet &allocmems, + llvm::DenseSet &freemems) { + assert(addressOfOp->hasOneUse() && "op must have single use"); + + auto declaredRef = + cast(*addressOfOp->getUsers().begin())->getResult(0); + + for (Operation *refUser : declaredRef.getUsers()) { + if (auto storeOp = dyn_cast(refUser)) + if (auto emboxOp = storeOp.getValue().getDefiningOp()) + if (auto allocmemOp = + emboxOp.getOperand(0).getDefiningOp()) + allocmems.insert(storeOp); + + if (auto loadOp = dyn_cast(refUser)) + for (Operation *loadUser : loadOp.getResult().getUsers()) + if (auto boxAddrOp = dyn_cast(loadUser)) + for (Operation *boxAddrUser : boxAddrOp.getResult().getUsers()) + if (auto freememOp = dyn_cast(boxAddrUser)) + freemems.insert(loadOp); + } + } + + void runOnOperation() override { + ModuleOp module = getOperation()->getParentOfType(); + if (!module) + module = dyn_cast(getOperation()); + if (!module) + return; + + // Build FIR builder for helper utilities. + fir::KindMapping kindMap = fir::getKindMapping(module); + fir::FirOpBuilder builder{module, std::move(kindMap)}; + + // Collect global variables with AUTOMAP flag. + llvm::DenseSet automapGlobals; + module.walk([&](fir::GlobalOp globalOp) { + if (auto iface = + dyn_cast(globalOp.getOperation())) + if (iface.isDeclareTarget() && iface.getDeclareTargetAutomap() && + iface.getDeclareTargetDeviceType() != + omp::DeclareTargetDeviceType::host) + automapGlobals.insert(globalOp); + }); + + auto addMapInfo = [&](auto globalOp, auto memOp) { + builder.setInsertionPointAfter(memOp); + SmallVector bounds; + if (needsBoundsOps(memOp.getMemref())) + genBoundsOps(builder, memOp.getMemref(), bounds); + + omp::TargetEnterExitUpdateDataOperands clauses; + mlir::omp::MapInfoOp mapInfo = mlir::omp::MapInfoOp::create( + builder, memOp.getLoc(), memOp.getMemref().getType(), + memOp.getMemref(), + TypeAttr::get(fir::unwrapRefType(memOp.getMemref().getType())), + builder.getIntegerAttr( + builder.getIntegerType(64, false), + static_cast( + isa(memOp) + ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO + : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE)), + builder.getAttr( + omp::VariableCaptureKind::ByCopy), + /*var_ptr_ptr=*/mlir::Value{}, + /*members=*/SmallVector{}, + /*members_index=*/ArrayAttr{}, bounds, + /*mapperId=*/mlir::FlatSymbolRefAttr(), globalOp.getSymNameAttr(), + builder.getBoolAttr(false)); + clauses.mapVars.push_back(mapInfo); + isa(memOp) + ? builder.create(memOp.getLoc(), clauses) + : builder.create(memOp.getLoc(), clauses); + }; + + for (fir::GlobalOp globalOp : automapGlobals) { + if (auto uses = globalOp.getSymbolUses(module.getOperation())) { + llvm::DenseSet allocmemStores; + llvm::DenseSet freememLoads; + for (auto &x : *uses) + if (auto addrOp = dyn_cast(x.getUser())) + findRelatedAllocmemFreemem(addrOp, allocmemStores, freememLoads); + + for (auto storeOp : allocmemStores) + addMapInfo(globalOp, storeOp); + + for (auto loadOp : freememLoads) + addMapInfo(globalOp, loadOp); + } + } + } +}; +} // namespace diff --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt b/flang/lib/Optimizer/OpenMP/CMakeLists.txt index e31543328a9f..afe90985e54f 100644 --- a/flang/lib/Optimizer/OpenMP/CMakeLists.txt +++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt @@ -1,6 +1,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) add_flang_library(FlangOpenMPTransforms + AutomapToTargetData.cpp DoConcurrentConversion.cpp FunctionFiltering.cpp GenericLoopConversion.cpp diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp index 970f7d7ab063..184f991fd91e 100644 --- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp +++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp @@ -29,6 +29,7 @@ #include "flang/Optimizer/Dialect/Support/KindMapping.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/OpenMP/Passes.h" +#include "flang/Support/OpenMP-utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" @@ -47,6 +48,7 @@ namespace flangomp { } // namespace flangomp using namespace mlir; +using namespace Fortran::common::openmp; namespace { class MapsForPrivatizedSymbolsPass @@ -193,38 +195,5 @@ class MapsForPrivatizedSymbolsPass } } } - // As the name suggests, this function examines var to determine if - // it has dynamic size. If true, this pass'll have to extract these - // bounds from descriptor of var and add the bounds to the resultant - // MapInfoOp. - bool needsBoundsOps(mlir::Value var) { - assert(mlir::isa(var.getType()) && - "needsBoundsOps can deal only with pointer types"); - mlir::Type t = fir::unwrapRefType(var.getType()); - // t could be a box, so look inside the box - auto innerType = fir::dyn_cast_ptrOrBoxEleTy(t); - if (innerType) - return fir::hasDynamicSize(innerType); - return fir::hasDynamicSize(t); - } - - void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var, - llvm::SmallVector &boundsOps) { - mlir::Location loc = var.getLoc(); - fir::factory::AddrAndBoundsInfo info = - fir::factory::getDataOperandBaseAddr(builder, var, - /*isOptional=*/false, loc); - fir::ExtendedValue extendedValue = - hlfir::translateToExtendedValue(loc, builder, hlfir::Entity{info.addr}, - /*continguousHint=*/true) - .first; - llvm::SmallVector boundsOpsVec = - fir::factory::genImplicitBoundsOps( - builder, info, extendedValue, - /*dataExvIsAssumedSize=*/false, loc); - for (auto bounds : boundsOpsVec) - boundsOps.push_back(bounds); - } }; } // namespace diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp index ca8e82060868..9f02d2df43e6 100644 --- a/flang/lib/Optimizer/Passes/Pipelines.cpp +++ b/flang/lib/Optimizer/Passes/Pipelines.cpp @@ -316,13 +316,13 @@ void createOpenMPFIRPassPipeline(mlir::PassManager &pm, pm.addPass(flangomp::createDoConcurrentConversionPass( opts.doConcurrentMappingKind == DoConcurrentMappingKind::DCMK_Device)); - // The MapsForPrivatizedSymbols pass needs to run before - // MapInfoFinalizationPass because the former creates new - // MapInfoOp instances, typically for descriptors. - // MapInfoFinalizationPass adds MapInfoOp instances for the descriptors - // underlying data which is necessary to access the data on the offload - // target device. + // The MapsForPrivatizedSymbols and AutomapToTargetDataPass pass need to run + // before MapInfoFinalizationPass because they create new MapInfoOp + // instances, typically for descriptors. MapInfoFinalizationPass adds + // MapInfoOp instances for the descriptors underlying data which is necessary + // to access the data on the offload target device. pm.addPass(flangomp::createMapsForPrivatizedSymbolsPass()); + pm.addPass(flangomp::createAutomapToTargetDataPass()); pm.addPass(flangomp::createMapInfoFinalizationPass()); pm.addPass(flangomp::createMarkDeclareTargetPass()); pm.addPass(flangomp::createGenericLoopConversionPass()); diff --git a/flang/lib/Support/OpenMP-utils.cpp b/flang/lib/Support/OpenMP-utils.cpp index 97e7723f0be8..c694639bef27 100644 --- a/flang/lib/Support/OpenMP-utils.cpp +++ b/flang/lib/Support/OpenMP-utils.cpp @@ -7,7 +7,10 @@ //===----------------------------------------------------------------------===// #include "flang/Support/OpenMP-utils.h" +#include "flang/Optimizer/Builder/DirectivesCommon.h" +#include "flang/Optimizer/Builder/HLFIRTools.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/OpDefinition.h" namespace Fortran::common::openmp { @@ -47,4 +50,30 @@ mlir::Block *genEntryBlock(mlir::OpBuilder &builder, const EntryBlockArgs &args, return builder.createBlock(®ion, {}, types, locs); } + +bool needsBoundsOps(mlir::Value var) { + assert(mlir::isa(var.getType()) && + "only pointer like types expected"); + mlir::Type t = fir::unwrapRefType(var.getType()); + if (mlir::Type inner = fir::dyn_cast_ptrOrBoxEleTy(t)) + return fir::hasDynamicSize(inner); + return fir::hasDynamicSize(t); +} + +void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var, + llvm::SmallVectorImpl &boundsOps) { + mlir::Location loc = var.getLoc(); + fir::factory::AddrAndBoundsInfo info = + fir::factory::getDataOperandBaseAddr(builder, var, + /*isOptional=*/false, loc); + fir::ExtendedValue exv = + hlfir::translateToExtendedValue(loc, builder, hlfir::Entity{info.addr}, + /*contiguousHint=*/true) + .first; + llvm::SmallVector tmp = + fir::factory::genImplicitBoundsOps( + builder, info, exv, /*dataExvIsAssumedSize=*/false, loc); + llvm::append_range(boundsOps, tmp); +} } // namespace Fortran::common::openmp diff --git a/flang/test/Transforms/omp-automap-to-target-data.fir b/flang/test/Transforms/omp-automap-to-target-data.fir new file mode 100644 index 000000000000..7a19705a248b --- /dev/null +++ b/flang/test/Transforms/omp-automap-to-target-data.fir @@ -0,0 +1,58 @@ +// RUN: fir-opt --omp-automap-to-target-data %s | FileCheck %s +// Test OMP AutomapToTargetData pass. + +module { + fir.global + @_QMtestEarr{omp.declare_target = #omp.declaretarget} target + : !fir.box>> + + func.func @automap() { + %c0 = arith.constant 0 : index + %c10 = arith.constant 10 : i32 + %addr = fir.address_of(@_QMtestEarr) : !fir.ref>>> + %decl:2 = hlfir.declare %addr {fortran_attrs = #fir.var_attrs, uniq_name = "_QMtestEarr"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %idx = fir.convert %c10 : (i32) -> index + %cond = arith.cmpi sgt, %idx, %c0 : index + %n = arith.select %cond, %idx, %c0 : index + %mem = fir.allocmem !fir.array, %n {fir.must_be_heap = true} + %shape = fir.shape %n : (index) -> !fir.shape<1> + %box = fir.embox %mem(%shape) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> + fir.store %box to %decl#0 : !fir.ref>>> + %ld = fir.load %decl#0 : !fir.ref>>> + %base = fir.box_addr %ld : (!fir.box>>) -> !fir.heap> + fir.freemem %base : !fir.heap> + %undef = fir.zero_bits !fir.heap> + %sh0 = fir.shape %c0 : (index) -> !fir.shape<1> + %empty = fir.embox %undef(%sh0) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> + fir.store %empty to %decl#0 : !fir.ref>>> + return + } +} + +// CHECK: fir.global @[[AUTOMAP:.*]] {{{.*}} automap = true +// CHECK-LABEL: func.func @automap() +// CHECK: %[[AUTOMAP_ADDR:.*]] = fir.address_of(@[[AUTOMAP]]) +// CHECK: %[[AUTOMAP_DECL:.*]]:2 = hlfir.declare %[[AUTOMAP_ADDR]] +// CHECK: %[[ALLOC_MEM:.*]] = fir.allocmem +// CHECK-NEXT: fir.shape +// CHECK-NEXT: %[[ARR_BOXED:.*]] = fir.embox %[[ALLOC_MEM]] +// CHECK-NEXT: fir.store %[[ARR_BOXED]] +// CHECK-NEXT: %[[ARR_BOXED_LOADED:.*]] = fir.load %[[AUTOMAP_DECL]]#0 +// CHECK-NEXT: %[[ARR_HEAP_PTR:.*]] = fir.box_addr %[[ARR_BOXED_LOADED]] +// CHECK-NEXT: %[[DIM0:.*]] = arith.constant 0 : index +// CHECK-NEXT: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[DIM0]] +// CHECK-NEXT: %[[ONE:.*]] = arith.constant 1 : index +// CHECK-NEXT: %[[ZERO:.*]] = arith.constant 0 : index +// CHECK-NEXT: %[[BOX_DIMS2:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[ZERO]] +// CHECK-NEXT: %[[LOWER_BOUND:.*]] = arith.constant 0 : index +// CHECK-NEXT: %[[UPPER_BOUND:.*]] = arith.subi %[[BOX_DIMS2]]#1, %[[ONE]] : index +// CHECK-NEXT: omp.map.bounds lower_bound(%[[LOWER_BOUND]] : index) upper_bound(%[[UPPER_BOUND]] : index) extent(%[[BOX_DIMS2]]#1 : index) stride(%[[BOX_DIMS2]]#2 : index) start_idx(%[[BOX_DIMS]]#0 : index) {stride_in_bytes = true} +// CHECK-NEXT: arith.muli %[[BOX_DIMS2]]#2, %[[BOX_DIMS2]]#1 : index +// CHECK-NEXT: %[[MAP_INFO:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 {{.*}} map_clauses(to) capture(ByCopy) +// CHECK-NEXT: omp.target_enter_data map_entries(%[[MAP_INFO]] +// CHECK: %[[LOAD:.*]] = fir.load %[[AUTOMAP_DECL]]#0 +// CHECK: %[[EXIT_MAP:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 {{.*}} map_clauses(delete) capture(ByCopy) +// CHECK-NEXT: omp.target_exit_data map_entries(%[[EXIT_MAP]] +// CHECK-NEXT: %[[BOXADDR:.*]] = fir.box_addr %[[LOAD]] +// CHECK-NEXT: fir.freemem %[[BOXADDR]] diff --git a/offload/test/offloading/fortran/declare-target-automap.f90 b/offload/test/offloading/fortran/declare-target-automap.f90 new file mode 100644 index 000000000000..50e8c124c25f --- /dev/null +++ b/offload/test/offloading/fortran/declare-target-automap.f90 @@ -0,0 +1,36 @@ +!Offloading test for AUTOMAP modifier in declare target enter +! REQUIRES: flang, amdgpu + +program automap_program + use iso_c_binding, only: c_loc + use omp_lib, only: omp_get_default_device, omp_target_is_present + integer, parameter :: N = 10 + integer :: i + integer, allocatable, target :: automap_array(:) + !$omp declare target enter(automap:automap_array) + + ! false since the storage is not present even though the descriptor is present + write (*, *) omp_target_is_present(c_loc(automap_array), omp_get_default_device()) + ! CHECK: 0 + + allocate (automap_array(N)) + ! true since the storage should be allocated and reference count incremented by the allocate + write (*, *) omp_target_is_present(c_loc(automap_array), omp_get_default_device()) + ! CHECK: 1 + + ! since storage is present this should not be a runtime error + !$omp target teams loop + do i = 1, N + automap_array(i) = i + end do + + !$omp target update from(automap_array) + write (*, *) automap_array + ! CHECK: 1 2 3 4 5 6 7 8 9 10 + + deallocate (automap_array) + + ! automap_array should have it's storage unmapped on device here + write (*, *) omp_target_is_present(c_loc(automap_array), omp_get_default_device()) + ! CHECK: 0 +end program