[flang][cuda] Generate cuf.allocate for descriptor with CUDA components (#152041)
The descriptor for derived-type with CUDA components are allocated in managed memory. The lowering was calling the standard runtime on allocate statement where it should be a `cuf.allocate` operation.
This commit is contained in:
parent
951f40ac38
commit
9b195dc3ef
@ -223,6 +223,8 @@ inline bool HasCUDAAttr(const Symbol &sym) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool HasCUDAComponent(const Symbol &sym);
|
||||
|
||||
inline bool IsCUDAShared(const Symbol &sym) {
|
||||
if (const auto *details{sym.GetUltimate().detailsIf<ObjectEntityDetails>()}) {
|
||||
if (details->cudaDataAttr() &&
|
||||
|
@ -466,7 +466,9 @@ private:
|
||||
|
||||
void genSimpleAllocation(const Allocation &alloc,
|
||||
const fir::MutableBoxValue &box) {
|
||||
bool isCudaSymbol = Fortran::semantics::HasCUDAAttr(alloc.getSymbol());
|
||||
bool isCudaAllocate =
|
||||
Fortran::semantics::HasCUDAAttr(alloc.getSymbol()) ||
|
||||
Fortran::semantics::HasCUDAComponent(alloc.getSymbol());
|
||||
bool isCudaDeviceContext = cuf::isCUDADeviceContext(builder.getRegion());
|
||||
bool inlineAllocation = !box.isDerived() && !errorManager.hasStatSpec() &&
|
||||
!alloc.type.IsPolymorphic() &&
|
||||
@ -475,7 +477,7 @@ private:
|
||||
unsigned allocatorIdx = Fortran::lower::getAllocatorIdx(alloc.getSymbol());
|
||||
|
||||
if (inlineAllocation &&
|
||||
((isCudaSymbol && isCudaDeviceContext) || !isCudaSymbol)) {
|
||||
((isCudaAllocate && isCudaDeviceContext) || !isCudaAllocate)) {
|
||||
// Pointers must use PointerAllocate so that their deallocations
|
||||
// can be validated.
|
||||
genInlinedAllocation(alloc, box);
|
||||
@ -494,7 +496,7 @@ private:
|
||||
genSetDeferredLengthParameters(alloc, box);
|
||||
genAllocateObjectBounds(alloc, box);
|
||||
mlir::Value stat;
|
||||
if (!isCudaSymbol) {
|
||||
if (!isCudaAllocate) {
|
||||
stat = genRuntimeAllocate(builder, loc, box, errorManager);
|
||||
setPinnedToFalse();
|
||||
} else {
|
||||
|
@ -814,8 +814,10 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter,
|
||||
baseTy = boxTy.getEleTy();
|
||||
baseTy = fir::unwrapRefType(baseTy);
|
||||
|
||||
if (mlir::isa<fir::SequenceType>(baseTy))
|
||||
TODO(loc, "array of derived-type with device component");
|
||||
if (mlir::isa<fir::SequenceType>(baseTy) &&
|
||||
(fir::isAllocatableType(fir::getBase(exv).getType()) ||
|
||||
fir::isPointerType(fir::getBase(exv).getType())))
|
||||
return; // Allocator index need to be set after allocation.
|
||||
|
||||
auto recTy =
|
||||
mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy));
|
||||
|
@ -1094,6 +1094,21 @@ bool IsDeviceAllocatable(const Symbol &symbol) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool HasCUDAComponent(const Symbol &symbol) {
|
||||
if (const auto *details{symbol.GetUltimate()
|
||||
.detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
|
||||
const Fortran::semantics::DeclTypeSpec *type{details->type()};
|
||||
const Fortran::semantics::DerivedTypeSpec *derived{
|
||||
type ? type->AsDerived() : nullptr};
|
||||
if (derived) {
|
||||
if (FindCUDADeviceAllocatableUltimateComponent(*derived)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
UltimateComponentIterator::const_iterator
|
||||
FindCUDADeviceAllocatableUltimateComponent(const DerivedTypeSpec &derived) {
|
||||
UltimateComponentIterator ultimates{derived};
|
||||
|
@ -6,6 +6,10 @@ module globals
|
||||
real, device, allocatable :: a_device(:)
|
||||
real, managed, allocatable :: a_managed(:)
|
||||
real, pinned, allocatable :: a_pinned(:)
|
||||
type :: t1
|
||||
integer :: a
|
||||
real, dimension(:), allocatable, device :: b
|
||||
end type
|
||||
end module
|
||||
|
||||
! CHECK-LABEL: fir.global @_QMglobalsEa_device {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?xf32>>>
|
||||
@ -222,3 +226,12 @@ end
|
||||
! CHECK: %[[FALSE:.*]] = arith.constant false
|
||||
! CHECK: %[[FLASE_CONV:.*]] = fir.convert %[[FALSE]] : (i1) -> !fir.logical<4>
|
||||
! CHECK: fir.store %[[FLASE_CONV]] to %[[PLOG_DECL]]#0 : !fir.ref<!fir.logical<4>>
|
||||
|
||||
subroutine cuda_component()
|
||||
use globals
|
||||
type(t1), pointer, dimension(:) :: d
|
||||
allocate(d(10))
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPcuda_component()
|
||||
! CHECK: cuf.allocate
|
||||
|
Loading…
x
Reference in New Issue
Block a user