[flang][cuda] Add operation to sync global descriptor (#121520)
Introduce cuf.sync_descriptor to be used to sync device global descriptor after pointer association. Also move CUFCommon so it can be used in FIRBuilder lib as well.
This commit is contained in:
parent
4010e0c45b
commit
4b17a8b10e
@ -140,6 +140,17 @@ def cuf_DeallocateOp : cuf_Op<"deallocate",
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def cuf_SyncDescriptorOp : cuf_Op<"sync_descriptor", []> {
|
||||
let summary =
|
||||
"Synchronize the host and device descriptor of a Fortran pointer";
|
||||
|
||||
let arguments = (ins SymbolRefAttr:$globalName);
|
||||
|
||||
let assemblyFormat = [{
|
||||
$globalName attr-dict
|
||||
}];
|
||||
}
|
||||
|
||||
def cuf_DataTransferOp : cuf_Op<"data_transfer", []> {
|
||||
let summary = "Represent a data transfer between host and device memory";
|
||||
|
||||
|
@ -22,12 +22,14 @@
|
||||
#include "flang/Lower/PFTBuilder.h"
|
||||
#include "flang/Lower/Runtime.h"
|
||||
#include "flang/Lower/StatementContext.h"
|
||||
#include "flang/Optimizer/Builder/CUFCommon.h"
|
||||
#include "flang/Optimizer/Builder/FIRBuilder.h"
|
||||
#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
|
||||
#include "flang/Optimizer/Builder/Todo.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
|
||||
#include "flang/Optimizer/Dialect/FIROps.h"
|
||||
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
|
||||
#include "flang/Optimizer/HLFIR/HLFIROps.h"
|
||||
#include "flang/Optimizer/Support/FatalError.h"
|
||||
#include "flang/Optimizer/Support/InternalNames.h"
|
||||
#include "flang/Parser/parse-tree.h"
|
||||
@ -1086,6 +1088,22 @@ bool Fortran::lower::isArraySectionWithoutVectorSubscript(
|
||||
!Fortran::evaluate::HasVectorSubscript(expr);
|
||||
}
|
||||
|
||||
static void genCUFPointerSync(const mlir::Value box,
|
||||
fir::FirOpBuilder &builder) {
|
||||
if (auto declareOp = box.getDefiningOp<hlfir::DeclareOp>()) {
|
||||
if (auto addrOfOp = declareOp.getMemref().getDefiningOp<fir::AddrOfOp>()) {
|
||||
auto mod = addrOfOp->getParentOfType<mlir::ModuleOp>();
|
||||
if (auto globalOp =
|
||||
mod.lookupSymbol<fir::GlobalOp>(addrOfOp.getSymbol())) {
|
||||
if (cuf::isRegisteredDeviceGlobal(globalOp)) {
|
||||
builder.create<cuf::SyncDescriptorOp>(box.getLoc(),
|
||||
addrOfOp.getSymbol());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Fortran::lower::associateMutableBox(
|
||||
Fortran::lower::AbstractConverter &converter, mlir::Location loc,
|
||||
const fir::MutableBoxValue &box, const Fortran::lower::SomeExpr &source,
|
||||
@ -1098,6 +1116,7 @@ void Fortran::lower::associateMutableBox(
|
||||
if (converter.getLoweringOptions().getLowerToHighLevelFIR()) {
|
||||
fir::ExtendedValue rhs = converter.genExprAddr(loc, source, stmtCtx);
|
||||
fir::factory::associateMutableBox(builder, loc, box, rhs, lbounds);
|
||||
genCUFPointerSync(box.getAddr(), builder);
|
||||
return;
|
||||
}
|
||||
// The right hand side is not be evaluated into a temp. Array sections can
|
||||
|
@ -5,6 +5,7 @@ add_flang_library(FIRBuilder
|
||||
BoxValue.cpp
|
||||
Character.cpp
|
||||
Complex.cpp
|
||||
CUFCommon.cpp
|
||||
DoLoopHelper.cpp
|
||||
FIRBuilder.cpp
|
||||
HLFIRTools.cpp
|
||||
|
@ -6,7 +6,7 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "flang/Optimizer/Transforms/CUFCommon.h"
|
||||
#include "flang/Optimizer/Builder/CUFCommon.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
|
||||
#include "mlir/Dialect/Func/IR/FuncOps.h"
|
||||
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
|
@ -9,7 +9,6 @@ add_flang_library(FIRTransforms
|
||||
CompilerGeneratedNames.cpp
|
||||
ConstantArgumentGlobalisation.cpp
|
||||
ControlFlowConverter.cpp
|
||||
CUFCommon.cpp
|
||||
CUFAddConstructor.cpp
|
||||
CUFDeviceGlobal.cpp
|
||||
CUFOpConversion.cpp
|
||||
|
@ -7,6 +7,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "flang/Optimizer/Builder/BoxValue.h"
|
||||
#include "flang/Optimizer/Builder/CUFCommon.h"
|
||||
#include "flang/Optimizer/Builder/FIRBuilder.h"
|
||||
#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
|
||||
#include "flang/Optimizer/Builder/Todo.h"
|
||||
@ -19,7 +20,6 @@
|
||||
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
|
||||
#include "flang/Optimizer/Dialect/FIRType.h"
|
||||
#include "flang/Optimizer/Support/DataLayout.h"
|
||||
#include "flang/Optimizer/Transforms/CUFCommon.h"
|
||||
#include "flang/Runtime/CUDA/registration.h"
|
||||
#include "flang/Runtime/entry-names.h"
|
||||
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
|
||||
|
@ -7,12 +7,12 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "flang/Common/Fortran.h"
|
||||
#include "flang/Optimizer/Builder/CUFCommon.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
|
||||
#include "flang/Optimizer/Dialect/FIRDialect.h"
|
||||
#include "flang/Optimizer/Dialect/FIROps.h"
|
||||
#include "flang/Optimizer/HLFIR/HLFIROps.h"
|
||||
#include "flang/Optimizer/Support/InternalNames.h"
|
||||
#include "flang/Optimizer/Transforms/CUFCommon.h"
|
||||
#include "flang/Runtime/CUDA/common.h"
|
||||
#include "flang/Runtime/allocatable.h"
|
||||
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
#include "flang/Optimizer/Transforms/CUFOpConversion.h"
|
||||
#include "flang/Common/Fortran.h"
|
||||
#include "flang/Optimizer/Builder/CUFCommon.h"
|
||||
#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
|
||||
#include "flang/Optimizer/CodeGen/TypeConverter.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
|
||||
@ -15,7 +16,6 @@
|
||||
#include "flang/Optimizer/Dialect/FIROps.h"
|
||||
#include "flang/Optimizer/HLFIR/HLFIROps.h"
|
||||
#include "flang/Optimizer/Support/DataLayout.h"
|
||||
#include "flang/Optimizer/Transforms/CUFCommon.h"
|
||||
#include "flang/Runtime/CUDA/allocatable.h"
|
||||
#include "flang/Runtime/CUDA/common.h"
|
||||
#include "flang/Runtime/CUDA/descriptor.h"
|
||||
|
@ -24,6 +24,7 @@
|
||||
|
||||
#include "flang/Common/Fortran.h"
|
||||
#include "flang/Optimizer/Builder/BoxValue.h"
|
||||
#include "flang/Optimizer/Builder/CUFCommon.h"
|
||||
#include "flang/Optimizer/Builder/FIRBuilder.h"
|
||||
#include "flang/Optimizer/Builder/LowLevelIntrinsics.h"
|
||||
#include "flang/Optimizer/Builder/Todo.h"
|
||||
@ -31,7 +32,6 @@
|
||||
#include "flang/Optimizer/Dialect/FIRType.h"
|
||||
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
|
||||
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
|
||||
#include "flang/Optimizer/Transforms/CUFCommon.h"
|
||||
#include "flang/Optimizer/Transforms/Passes.h"
|
||||
#include "flang/Optimizer/Transforms/Utils.h"
|
||||
#include "flang/Runtime/entry-names.h"
|
||||
|
17
flang/test/Lower/CUDA/cuda-pointer-sync.cuf
Normal file
17
flang/test/Lower/CUDA/cuda-pointer-sync.cuf
Normal file
@ -0,0 +1,17 @@
|
||||
! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
|
||||
|
||||
module devptr
|
||||
real, device, pointer, dimension(:) :: dev_ptr
|
||||
end module
|
||||
|
||||
use devptr
|
||||
real, device, target, dimension(4) :: a_dev
|
||||
a_dev = 42.0
|
||||
dev_ptr => a_dev
|
||||
end
|
||||
|
||||
! CHECK: fir.global @_QMdevptrEdev_ptr {data_attr = #cuf.cuda<device>} : !fir.box<!fir.ptr<!fir.array<?xf32>>>
|
||||
! CHECK-LABEL: func.func @_QQmain()
|
||||
! CHECK: fir.embox
|
||||
! CHECK: fir.store
|
||||
! CHECK: cuf.sync_descriptor @_QMdevptrEdev_ptr
|
Loading…
x
Reference in New Issue
Block a user