[flang][cuda] Set the allocator of derived type component after allocation (#152379)

- Move the allocator index set up after the allocate statement otherwise
the derived type descriptor is not allocated.
- Support array of derived-type with device component
This commit is contained in:
Valentin Clement (バレンタイン クレメン) 2025-08-06 15:14:00 -07:00 committed by GitHub
parent 885ddf4a3a
commit d897355876
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 205 additions and 89 deletions

View File

@ -1,4 +1,4 @@
//===-- Lower/Cuda.h -- Cuda Fortran utilities ------------------*- C++ -*-===//
//===-- Lower/CUDA.h -- CUDA Fortran utilities ------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@ -15,6 +15,7 @@
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Runtime/allocator-registry-consts.h"
#include "flang/Semantics/tools.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
@ -37,6 +38,15 @@ static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
return kDefaultAllocator;
}
void initializeDeviceComponentAllocator(
Fortran::lower::AbstractConverter &converter,
const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box);
mlir::Type gatherDeviceComponentCoordinatesAndType(
fir::FirOpBuilder &builder, mlir::Location loc,
const Fortran::semantics::Symbol &sym, fir::RecordType recTy,
llvm::SmallVector<mlir::Value> &coordinates);
} // end namespace Fortran::lower
#endif // FORTRAN_LOWER_CUDA_H

View File

@ -13,9 +13,9 @@
#include "flang/Lower/Allocatable.h"
#include "flang/Evaluate/tools.h"
#include "flang/Lower/AbstractConverter.h"
#include "flang/Lower/CUDA.h"
#include "flang/Lower/ConvertType.h"
#include "flang/Lower/ConvertVariable.h"
#include "flang/Lower/Cuda.h"
#include "flang/Lower/IterationSpace.h"
#include "flang/Lower/Mangler.h"
#include "flang/Lower/OpenACC.h"
@ -445,10 +445,14 @@ private:
/*mustBeHeap=*/true);
}
void postAllocationAction(const Allocation &alloc) {
void postAllocationAction(const Allocation &alloc,
const fir::MutableBoxValue &box) {
if (alloc.getSymbol().test(Fortran::semantics::Symbol::Flag::AccDeclare))
Fortran::lower::attachDeclarePostAllocAction(converter, builder,
alloc.getSymbol());
if (Fortran::semantics::HasCUDAComponent(alloc.getSymbol()))
Fortran::lower::initializeDeviceComponentAllocator(
converter, alloc.getSymbol(), box);
}
void setPinnedToFalse() {
@ -481,7 +485,7 @@ private:
// Pointers must use PointerAllocate so that their deallocations
// can be validated.
genInlinedAllocation(alloc, box);
postAllocationAction(alloc);
postAllocationAction(alloc, box);
setPinnedToFalse();
return;
}
@ -504,7 +508,7 @@ private:
genCudaAllocate(builder, loc, box, errorManager, alloc.getSymbol());
}
fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
postAllocationAction(alloc);
postAllocationAction(alloc, box);
errorManager.assignStat(builder, loc, stat);
}
@ -647,7 +651,7 @@ private:
setPinnedToFalse();
}
fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
postAllocationAction(alloc);
postAllocationAction(alloc, box);
errorManager.assignStat(builder, loc, stat);
}

View File

@ -13,6 +13,7 @@
#include "flang/Lower/Bridge.h"
#include "flang/Lower/Allocatable.h"
#include "flang/Lower/CUDA.h"
#include "flang/Lower/CallInterface.h"
#include "flang/Lower/Coarray.h"
#include "flang/Lower/ConvertCall.h"
@ -20,7 +21,6 @@
#include "flang/Lower/ConvertExprToHLFIR.h"
#include "flang/Lower/ConvertType.h"
#include "flang/Lower/ConvertVariable.h"
#include "flang/Lower/Cuda.h"
#include "flang/Lower/DirectivesCommon.h"
#include "flang/Lower/HostAssociations.h"
#include "flang/Lower/IO.h"

View File

@ -15,6 +15,7 @@ add_flang_library(FortranLower
ConvertProcedureDesignator.cpp
ConvertType.cpp
ConvertVariable.cpp
CUDA.cpp
CustomIntrinsicCall.cpp
HlfirIntrinsics.cpp
HostAssociations.cpp

148
flang/lib/Lower/CUDA.cpp Normal file
View File

@ -0,0 +1,148 @@
//===-- CUDA.cpp -- CUDA Fortran specific lowering ------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
//
//===----------------------------------------------------------------------===//
#include "flang/Lower/CUDA.h"
#define DEBUG_TYPE "flang-lower-cuda"
void Fortran::lower::initializeDeviceComponentAllocator(
Fortran::lower::AbstractConverter &converter,
const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box) {
if (const auto *details{
sym.GetUltimate()
.detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
const Fortran::semantics::DeclTypeSpec *type{details->type()};
const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived()
: nullptr};
if (derived) {
if (!FindCUDADeviceAllocatableUltimateComponent(*derived))
return; // No device components.
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
mlir::Location loc = converter.getCurrentLocation();
mlir::Type baseTy = fir::unwrapRefType(box.getAddr().getType());
// Only pointer and allocatable needs post allocation initialization
// of components descriptors.
if (!fir::isAllocatableType(baseTy) && !fir::isPointerType(baseTy))
return;
// Extract the derived type.
mlir::Type ty = fir::getDerivedType(baseTy);
auto recTy = mlir::dyn_cast<fir::RecordType>(ty);
assert(recTy && "expected fir::RecordType");
if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy))
baseTy = boxTy.getEleTy();
baseTy = fir::unwrapRefType(baseTy);
Fortran::semantics::UltimateComponentIterator components{*derived};
mlir::Value loadedBox = fir::LoadOp::create(builder, loc, box.getAddr());
mlir::Value addr;
if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(baseTy)) {
mlir::Type idxTy = builder.getIndexType();
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
llvm::SmallVector<fir::DoLoopOp> loops;
llvm::SmallVector<mlir::Value> indices;
llvm::SmallVector<mlir::Value> extents;
for (unsigned i = 0; i < seqTy.getDimension(); ++i) {
mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
auto dimInfo = fir::BoxDimsOp::create(builder, loc, idxTy, idxTy,
idxTy, loadedBox, dim);
mlir::Value lbub = mlir::arith::AddIOp::create(
builder, loc, dimInfo.getResult(0), dimInfo.getResult(1));
mlir::Value ext =
mlir::arith::SubIOp::create(builder, loc, lbub, one);
mlir::Value cmp = mlir::arith::CmpIOp::create(
builder, loc, mlir::arith::CmpIPredicate::sgt, ext, zero);
ext = mlir::arith::SelectOp::create(builder, loc, cmp, ext, zero);
extents.push_back(ext);
auto loop = fir::DoLoopOp::create(
builder, loc, dimInfo.getResult(0), dimInfo.getResult(1),
dimInfo.getResult(2), /*isUnordered=*/true,
/*finalCount=*/false, mlir::ValueRange{});
loops.push_back(loop);
indices.push_back(loop.getInductionVar());
builder.setInsertionPointToStart(loop.getBody());
}
mlir::Value boxAddr = fir::BoxAddrOp::create(builder, loc, loadedBox);
auto shape = fir::ShapeOp::create(builder, loc, extents);
addr = fir::ArrayCoorOp::create(
builder, loc, fir::ReferenceType::get(recTy), boxAddr, shape,
/*slice=*/mlir::Value{}, indices, /*typeparms=*/mlir::ValueRange{});
} else {
addr = fir::BoxAddrOp::create(builder, loc, loadedBox);
}
for (const auto &compSym : components) {
if (Fortran::semantics::IsDeviceAllocatable(compSym)) {
llvm::SmallVector<mlir::Value> coord;
mlir::Type fieldTy = gatherDeviceComponentCoordinatesAndType(
builder, loc, compSym, recTy, coord);
assert(coord.size() == 1 && "expect one coordinate");
mlir::Value comp = fir::CoordinateOp::create(
builder, loc, builder.getRefType(fieldTy), addr, coord[0]);
cuf::DataAttributeAttr dataAttr =
Fortran::lower::translateSymbolCUFDataAttribute(
builder.getContext(), compSym);
cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr);
}
}
}
}
}
mlir::Type Fortran::lower::gatherDeviceComponentCoordinatesAndType(
fir::FirOpBuilder &builder, mlir::Location loc,
const Fortran::semantics::Symbol &sym, fir::RecordType recTy,
llvm::SmallVector<mlir::Value> &coordinates) {
unsigned fieldIdx = recTy.getFieldIndex(sym.name().ToString());
mlir::Type fieldTy;
if (fieldIdx != std::numeric_limits<unsigned>::max()) {
// Field found in the base record type.
auto fieldName = recTy.getTypeList()[fieldIdx].first;
fieldTy = recTy.getTypeList()[fieldIdx].second;
mlir::Value fieldIndex = fir::FieldIndexOp::create(
builder, loc, fir::FieldType::get(fieldTy.getContext()), fieldName,
recTy,
/*typeParams=*/mlir::ValueRange{});
coordinates.push_back(fieldIndex);
} else {
// Field not found in base record type, search in potential
// record type components.
for (auto component : recTy.getTypeList()) {
if (auto childRecTy = mlir::dyn_cast<fir::RecordType>(component.second)) {
fieldIdx = childRecTy.getFieldIndex(sym.name().ToString());
if (fieldIdx != std::numeric_limits<unsigned>::max()) {
mlir::Value parentFieldIndex = fir::FieldIndexOp::create(
builder, loc, fir::FieldType::get(childRecTy.getContext()),
component.first, recTy,
/*typeParams=*/mlir::ValueRange{});
coordinates.push_back(parentFieldIndex);
auto fieldName = childRecTy.getTypeList()[fieldIdx].first;
fieldTy = childRecTy.getTypeList()[fieldIdx].second;
mlir::Value childFieldIndex = fir::FieldIndexOp::create(
builder, loc, fir::FieldType::get(fieldTy.getContext()),
fieldName, childRecTy,
/*typeParams=*/mlir::ValueRange{});
coordinates.push_back(childFieldIndex);
break;
}
}
}
}
if (coordinates.empty())
TODO(loc, "device resident component in complex derived-type hierarchy");
return fieldTy;
}

View File

@ -14,12 +14,12 @@
#include "flang/Lower/AbstractConverter.h"
#include "flang/Lower/Allocatable.h"
#include "flang/Lower/BoxAnalyzer.h"
#include "flang/Lower/CUDA.h"
#include "flang/Lower/CallInterface.h"
#include "flang/Lower/ConvertConstant.h"
#include "flang/Lower/ConvertExpr.h"
#include "flang/Lower/ConvertExprToHLFIR.h"
#include "flang/Lower/ConvertProcedureDesignator.h"
#include "flang/Lower/Cuda.h"
#include "flang/Lower/Mangler.h"
#include "flang/Lower/PFTBuilder.h"
#include "flang/Lower/StatementContext.h"
@ -814,81 +814,24 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter,
baseTy = boxTy.getEleTy();
baseTy = fir::unwrapRefType(baseTy);
if (mlir::isa<fir::SequenceType>(baseTy) &&
(fir::isAllocatableType(fir::getBase(exv).getType()) ||
fir::isPointerType(fir::getBase(exv).getType())))
if (fir::isAllocatableType(fir::getBase(exv).getType()) ||
fir::isPointerType(fir::getBase(exv).getType()))
return; // Allocator index need to be set after allocation.
auto recTy =
mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy));
assert(recTy && "expected fir::RecordType");
llvm::SmallVector<mlir::Value> coordinates;
Fortran::semantics::UltimateComponentIterator components{*derived};
for (const auto &sym : components) {
if (Fortran::semantics::IsDeviceAllocatable(sym)) {
unsigned fieldIdx = recTy.getFieldIndex(sym.name().ToString());
mlir::Type fieldTy;
llvm::SmallVector<mlir::Value> coordinates;
if (fieldIdx != std::numeric_limits<unsigned>::max()) {
// Field found in the base record type.
auto fieldName = recTy.getTypeList()[fieldIdx].first;
fieldTy = recTy.getTypeList()[fieldIdx].second;
mlir::Value fieldIndex = fir::FieldIndexOp::create(
builder, loc, fir::FieldType::get(fieldTy.getContext()),
fieldName, recTy,
/*typeParams=*/mlir::ValueRange{});
coordinates.push_back(fieldIndex);
} else {
// Field not found in base record type, search in potential
// record type components.
for (auto component : recTy.getTypeList()) {
if (auto childRecTy =
mlir::dyn_cast<fir::RecordType>(component.second)) {
fieldIdx = childRecTy.getFieldIndex(sym.name().ToString());
if (fieldIdx != std::numeric_limits<unsigned>::max()) {
mlir::Value parentFieldIndex = fir::FieldIndexOp::create(
builder, loc,
fir::FieldType::get(childRecTy.getContext()),
component.first, recTy,
/*typeParams=*/mlir::ValueRange{});
coordinates.push_back(parentFieldIndex);
auto fieldName = childRecTy.getTypeList()[fieldIdx].first;
fieldTy = childRecTy.getTypeList()[fieldIdx].second;
mlir::Value childFieldIndex = fir::FieldIndexOp::create(
builder, loc, fir::FieldType::get(fieldTy.getContext()),
fieldName, childRecTy,
/*typeParams=*/mlir::ValueRange{});
coordinates.push_back(childFieldIndex);
break;
}
}
}
}
if (coordinates.empty())
TODO(loc, "device resident component in complex derived-type "
"hierarchy");
llvm::SmallVector<mlir::Value> coord;
mlir::Type fieldTy =
Fortran::lower::gatherDeviceComponentCoordinatesAndType(
builder, loc, sym, recTy, coord);
mlir::Value base = fir::getBase(exv);
mlir::Value comp;
if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(base.getType()))) {
mlir::Value box = fir::LoadOp::create(builder, loc, base);
mlir::Value addr = fir::BoxAddrOp::create(builder, loc, box);
llvm::SmallVector<mlir::Value> lenParams;
assert(coordinates.size() == 1 && "expect one coordinate");
auto field = mlir::dyn_cast<fir::FieldIndexOp>(
coordinates[0].getDefiningOp());
comp = hlfir::DesignateOp::create(
builder, loc, builder.getRefType(fieldTy), addr,
/*component=*/field.getFieldName(),
/*componentShape=*/mlir::Value{},
hlfir::DesignateOp::Subscripts{});
} else {
comp = fir::CoordinateOp::create(
builder, loc, builder.getRefType(fieldTy), base, coordinates);
}
mlir::Value comp = fir::CoordinateOp::create(
builder, loc, builder.getRefType(fieldTy), base, coord);
cuf::DataAttributeAttr dataAttr =
Fortran::lower::translateSymbolCUFDataAttribute(
builder.getContext(), sym);

View File

@ -23,34 +23,44 @@ contains
subroutine sub2()
type(ty_device), pointer :: d1
allocate(d1)
end subroutine
! CHECK-LABEL: func.func @_QMm1Psub2()
! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub2Ed1"} -> !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMm1Fsub2Ed1"} : (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>)
! CHECK: %[[LOAD1:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
! CHECK: %[[ADDR1:.*]] = fir.box_addr %[[LOAD1]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
! CHECK: %[[DESIGNATE1:.*]] = hlfir.designate %[[ADDR1]]{"x"} : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: cuf.set_allocator_idx %[[DESIGNATE1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
! CHECK: %[[LOAD2:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
! CHECK: %[[ADDR2:.*]] = fir.box_addr %[[LOAD2]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
! CHECK: %[[DESIGNATE2:.*]] = hlfir.designate %[[ADDR2]]{"z"} : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: cuf.set_allocator_idx %[[DESIGNATE2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
! CHECK: cuf.allocate
! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
subroutine sub3()
type(ty_device), allocatable :: d1
allocate(d1)
end subroutine
! CHECK-LABEL: func.func @_QMm1Psub3()
! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub3Ed1"} -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMm1Fsub3Ed1"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>)
! CHECK: %[[LOAD1:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
! CHECK: %[[ADDR1:.*]] = fir.box_addr %[[LOAD1]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
! CHECK: %[[DESIGNATE1:.*]] = hlfir.designate %[[ADDR1]]{"x"} : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: cuf.set_allocator_idx %[[DESIGNATE1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
! CHECK: %[[LOAD2:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
! CHECK: %[[ADDR2:.*]] = fir.box_addr %[[LOAD2]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
! CHECK: %[[DESIGNATE2:.*]] = hlfir.designate %[[ADDR2]]{"z"} : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: cuf.set_allocator_idx %[[DESIGNATE2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
! CHECK: cuf.allocate
! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
subroutine sub4()
type(ty_device), allocatable :: d1(:,:)
allocate(d1(10, 10))
end subroutine
! CHECK-LABEL: func.func @_QMm1Psub4()
! CHECK: cuf.allocate
! CHECK-COUNT-2: fir.do_loop
! CHECK-COUNT-2: cuf.set_allocator_idx
end module