From 9b195dc3ef66de2c1ff0048822b24a322ec3c52a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 4 Aug 2025 16:51:11 -0700 Subject: [PATCH] [flang][cuda] Generate cuf.allocate for descriptor with CUDA components (#152041) The descriptor for derived-type with CUDA components are allocated in managed memory. The lowering was calling the standard runtime on allocate statement where it should be a `cuf.allocate` operation. --- flang/include/flang/Semantics/tools.h | 2 ++ flang/lib/Lower/Allocatable.cpp | 8 +++++--- flang/lib/Lower/ConvertVariable.cpp | 6 ++++-- flang/lib/Semantics/tools.cpp | 15 +++++++++++++++ flang/test/Lower/CUDA/cuda-allocatable.cuf | 13 +++++++++++++ 5 files changed, 39 insertions(+), 5 deletions(-) diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index 317b9357b4c1..966a30f7081f 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -223,6 +223,8 @@ inline bool HasCUDAAttr(const Symbol &sym) { return false; } +bool HasCUDAComponent(const Symbol &sym); + inline bool IsCUDAShared(const Symbol &sym) { if (const auto *details{sym.GetUltimate().detailsIf()}) { if (details->cudaDataAttr() && diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index ef16b0cd4c0f..219f9205f45d 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -466,7 +466,9 @@ private: void genSimpleAllocation(const Allocation &alloc, const fir::MutableBoxValue &box) { - bool isCudaSymbol = Fortran::semantics::HasCUDAAttr(alloc.getSymbol()); + bool isCudaAllocate = + Fortran::semantics::HasCUDAAttr(alloc.getSymbol()) || + Fortran::semantics::HasCUDAComponent(alloc.getSymbol()); bool isCudaDeviceContext = cuf::isCUDADeviceContext(builder.getRegion()); bool inlineAllocation = !box.isDerived() && !errorManager.hasStatSpec() && !alloc.type.IsPolymorphic() && @@ -475,7 +477,7 @@ private: unsigned allocatorIdx = Fortran::lower::getAllocatorIdx(alloc.getSymbol()); if (inlineAllocation && - ((isCudaSymbol && isCudaDeviceContext) || !isCudaSymbol)) { + ((isCudaAllocate && isCudaDeviceContext) || !isCudaAllocate)) { // Pointers must use PointerAllocate so that their deallocations // can be validated. genInlinedAllocation(alloc, box); @@ -494,7 +496,7 @@ private: genSetDeferredLengthParameters(alloc, box); genAllocateObjectBounds(alloc, box); mlir::Value stat; - if (!isCudaSymbol) { + if (!isCudaAllocate) { stat = genRuntimeAllocate(builder, loc, box, errorManager); setPinnedToFalse(); } else { diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 88d17ac1ac78..a4a8a697e02a 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -814,8 +814,10 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter, baseTy = boxTy.getEleTy(); baseTy = fir::unwrapRefType(baseTy); - if (mlir::isa(baseTy)) - TODO(loc, "array of derived-type with device component"); + if (mlir::isa(baseTy) && + (fir::isAllocatableType(fir::getBase(exv).getType()) || + fir::isPointerType(fir::getBase(exv).getType()))) + return; // Allocator index need to be set after allocation. auto recTy = mlir::dyn_cast(fir::unwrapSequenceType(baseTy)); diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 5a5b02e1ac3c..913bf08cd0d9 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -1094,6 +1094,21 @@ bool IsDeviceAllocatable(const Symbol &symbol) { return false; } +bool HasCUDAComponent(const Symbol &symbol) { + if (const auto *details{symbol.GetUltimate() + .detailsIf()}) { + const Fortran::semantics::DeclTypeSpec *type{details->type()}; + const Fortran::semantics::DerivedTypeSpec *derived{ + type ? type->AsDerived() : nullptr}; + if (derived) { + if (FindCUDADeviceAllocatableUltimateComponent(*derived)) { + return true; + } + } + } + return false; +} + UltimateComponentIterator::const_iterator FindCUDADeviceAllocatableUltimateComponent(const DerivedTypeSpec &derived) { UltimateComponentIterator ultimates{derived}; diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf index 36e768bd7d92..2cf8c7d33681 100644 --- a/flang/test/Lower/CUDA/cuda-allocatable.cuf +++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf @@ -6,6 +6,10 @@ module globals real, device, allocatable :: a_device(:) real, managed, allocatable :: a_managed(:) real, pinned, allocatable :: a_pinned(:) + type :: t1 + integer :: a + real, dimension(:), allocatable, device :: b + end type end module ! CHECK-LABEL: fir.global @_QMglobalsEa_device {data_attr = #cuf.cuda} : !fir.box>> @@ -222,3 +226,12 @@ end ! CHECK: %[[FALSE:.*]] = arith.constant false ! CHECK: %[[FLASE_CONV:.*]] = fir.convert %[[FALSE]] : (i1) -> !fir.logical<4> ! CHECK: fir.store %[[FLASE_CONV]] to %[[PLOG_DECL]]#0 : !fir.ref> + +subroutine cuda_component() + use globals + type(t1), pointer, dimension(:) :: d + allocate(d(10)) +end subroutine + +! CHECK-LABEL: func.func @_QPcuda_component() +! CHECK: cuf.allocate