[flang][cuda] Support data transfer with conversion (#153242)
When the rhs of the data transfer is from a different type, allocate a new temp on the host and first transfer the rhs to it. Then, use the elemental op created to do the conversion.
This commit is contained in:
parent
6ae6c4fa4b
commit
a2899c457e
@ -62,6 +62,8 @@ cuf::DataAttributeAttr
|
||||
translateSymbolCUFDataAttribute(mlir::MLIRContext *mlirContext,
|
||||
const Fortran::semantics::Symbol &sym);
|
||||
|
||||
bool isTransferWithConversion(mlir::Value rhs);
|
||||
|
||||
} // end namespace Fortran::lower
|
||||
|
||||
#endif // FORTRAN_LOWER_CUDA_H
|
||||
|
@ -4827,7 +4827,9 @@ private:
|
||||
|
||||
void genCUDADataTransfer(fir::FirOpBuilder &builder, mlir::Location loc,
|
||||
const Fortran::evaluate::Assignment &assign,
|
||||
hlfir::Entity &lhs, hlfir::Entity &rhs) {
|
||||
hlfir::Entity &lhs, hlfir::Entity &rhs,
|
||||
bool isWholeAllocatableAssignment,
|
||||
bool keepLhsLengthInAllocatableAssignment) {
|
||||
bool lhsIsDevice = Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs);
|
||||
bool rhsIsDevice = Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs);
|
||||
|
||||
@ -4892,6 +4894,28 @@ private:
|
||||
|
||||
// host = device
|
||||
if (!lhsIsDevice && rhsIsDevice) {
|
||||
if (Fortran::lower::isTransferWithConversion(rhs)) {
|
||||
mlir::OpBuilder::InsertionGuard insertionGuard(builder);
|
||||
auto elementalOp =
|
||||
mlir::dyn_cast<hlfir::ElementalOp>(rhs.getDefiningOp());
|
||||
assert(elementalOp && "expect elemental op");
|
||||
auto designateOp =
|
||||
*elementalOp.getBody()->getOps<hlfir::DesignateOp>().begin();
|
||||
builder.setInsertionPoint(elementalOp);
|
||||
// Create a temp to transfer the rhs before applying the conversion.
|
||||
hlfir::Entity entity{designateOp.getMemref()};
|
||||
auto [temp, cleanup] = hlfir::createTempFromMold(loc, builder, entity);
|
||||
auto transferKindAttr = cuf::DataTransferKindAttr::get(
|
||||
builder.getContext(), cuf::DataTransferKind::DeviceHost);
|
||||
cuf::DataTransferOp::create(builder, loc, designateOp.getMemref(), temp,
|
||||
/*shape=*/mlir::Value{}, transferKindAttr);
|
||||
designateOp.getMemrefMutable().assign(temp);
|
||||
builder.setInsertionPointAfter(elementalOp);
|
||||
hlfir::AssignOp::create(builder, loc, elementalOp, lhs,
|
||||
isWholeAllocatableAssignment,
|
||||
keepLhsLengthInAllocatableAssignment);
|
||||
return;
|
||||
}
|
||||
auto transferKindAttr = cuf::DataTransferKindAttr::get(
|
||||
builder.getContext(), cuf::DataTransferKind::DeviceHost);
|
||||
cuf::DataTransferOp::create(builder, loc, rhsVal, lhsVal, shape,
|
||||
@ -5039,7 +5063,9 @@ private:
|
||||
hlfir::Entity rhs = evaluateRhs(localStmtCtx);
|
||||
hlfir::Entity lhs = evaluateLhs(localStmtCtx);
|
||||
if (isCUDATransfer && !hasCUDAImplicitTransfer)
|
||||
genCUDADataTransfer(builder, loc, assign, lhs, rhs);
|
||||
genCUDADataTransfer(builder, loc, assign, lhs, rhs,
|
||||
isWholeAllocatableAssignment,
|
||||
keepLhsLengthInAllocatableAssignment);
|
||||
else
|
||||
hlfir::AssignOp::create(builder, loc, rhs, lhs,
|
||||
isWholeAllocatableAssignment,
|
||||
|
@ -155,3 +155,12 @@ cuf::DataAttributeAttr Fortran::lower::translateSymbolCUFDataAttribute(
|
||||
Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
|
||||
return cuf::getDataAttribute(mlirContext, cudaAttr);
|
||||
}
|
||||
|
||||
bool Fortran::lower::isTransferWithConversion(mlir::Value rhs) {
|
||||
if (auto elOp = mlir::dyn_cast<hlfir::ElementalOp>(rhs.getDefiningOp()))
|
||||
if (llvm::hasSingleElement(elOp.getBody()->getOps<hlfir::DesignateOp>()) &&
|
||||
llvm::hasSingleElement(elOp.getBody()->getOps<fir::LoadOp>()) == 1 &&
|
||||
llvm::hasSingleElement(elOp.getBody()->getOps<fir::ConvertOp>()) == 1)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
@ -474,3 +474,51 @@ end
|
||||
! CHECK: cuf.data_transfer %{{.*}} to %{{.*}} {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.box<!fir.array<?xf64>>
|
||||
! CHECK: hlfir.assign %{{.*}} to %{{.*}} : f64, !fir.ref<f64>
|
||||
! CHECK: fir.freemem %{{.*}} : !fir.heap<!fir.array<?xf64>>
|
||||
|
||||
subroutine sub26(i, j, k)
|
||||
integer :: i, j, k
|
||||
real(2), dimension(i,j,k), device :: d
|
||||
real(4), dimension(i,j,k) :: hd
|
||||
|
||||
hd = d
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub26
|
||||
! CHECK: %[[ALLOC_D:.*]] = cuf.alloc !fir.array<?x?x?xf16>, %{{.*}}, %{{.*}}, %{{.*}} : index, index, index {bindc_name = "d", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub26Ed"} -> !fir.ref<!fir.array<?x?x?xf16>>
|
||||
! CHECK: %[[D:.*]]:2 = hlfir.declare %[[ALLOC_D]](%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub26Ed"} : (!fir.ref<!fir.array<?x?x?xf16>>, !fir.shape<3>) -> (!fir.box<!fir.array<?x?x?xf16>>, !fir.ref<!fir.array<?x?x?xf16>>)
|
||||
! CHECK: %[[HD:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub26Ehd"} : (!fir.ref<!fir.array<?x?x?xf32>>, !fir.shape<3>) -> (!fir.box<!fir.array<?x?x?xf32>>, !fir.ref<!fir.array<?x?x?xf32>>)
|
||||
! CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<?x?x?xf16>, %8, %13, %18 {bindc_name = ".tmp", uniq_name = ""}
|
||||
! CHECK: %[[TEMP:.*]]:2 = hlfir.declare %[[ALLOC]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?x?x?xf16>>, !fir.shape<3>) -> (!fir.box<!fir.array<?x?x?xf16>>, !fir.heap<!fir.array<?x?x?xf16>>)
|
||||
! CHECK: cuf.data_transfer %[[D]]#0 to %[[TEMP]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.box<!fir.array<?x?x?xf16>>, !fir.box<!fir.array<?x?x?xf16>>
|
||||
! CHECK: %[[ELE:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<3>) -> !hlfir.expr<?x?x?xf32> {
|
||||
! CHECK: ^bb0(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index):
|
||||
! CHECK: %[[DESIGNATE:.*]] = hlfir.designate %[[TEMP]]#0 (%{{.*}}, %{{.*}}, %{{.*}}) : (!fir.box<!fir.array<?x?x?xf16>>, index, index, index) -> !fir.ref<f16>
|
||||
! CHECK: %[[LOAD:.*]] = fir.load %[[DESIGNATE]] : !fir.ref<f16>
|
||||
! CHECK: %[[CONV:.*]] = fir.convert %[[LOAD]] : (f16) -> f32
|
||||
! CHECK: hlfir.yield_element %[[CONV]] : f32
|
||||
! CHECK: }
|
||||
! CHECK: hlfir.assign %[[ELE]] to %[[HD]]#0 : !hlfir.expr<?x?x?xf32>, !fir.box<!fir.array<?x?x?xf32>>
|
||||
|
||||
subroutine sub27()
|
||||
real(2), dimension(10, 20, 30), device :: d
|
||||
real(4), dimension(10, 20, 30) :: hd
|
||||
|
||||
hd = d
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub27()
|
||||
! CHECK: %[[ALLOC_D:.*]] = cuf.alloc !fir.array<10x20x30xf16> {bindc_name = "d", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub27Ed"} -> !fir.ref<!fir.array<10x20x30xf16>>
|
||||
! CHECK: %[[D:.*]]:2 = hlfir.declare %[[ALLOC_D]](%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub27Ed"} : (!fir.ref<!fir.array<10x20x30xf16>>, !fir.shape<3>) -> (!fir.ref<!fir.array<10x20x30xf16>>, !fir.ref<!fir.array<10x20x30xf16>>)
|
||||
! CHECK: %[[ALLOC_HD:.*]] = fir.alloca !fir.array<10x20x30xf32> {bindc_name = "hd", uniq_name = "_QFsub27Ehd"}
|
||||
! CHECK: %[[HD:.*]]:2 = hlfir.declare %[[ALLOC_HD]](%{{.*}}) {uniq_name = "_QFsub27Ehd"} : (!fir.ref<!fir.array<10x20x30xf32>>, !fir.shape<3>) -> (!fir.ref<!fir.array<10x20x30xf32>>, !fir.ref<!fir.array<10x20x30xf32>>)
|
||||
! CHECK: %[[ALLOC_TEMP:.*]] = fir.allocmem !fir.array<10x20x30xf16> {bindc_name = ".tmp", uniq_name = ""}
|
||||
! CHECK: %[[TEMP:.*]]:2 = hlfir.declare %[[ALLOC_TEMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<10x20x30xf16>>, !fir.shape<3>) -> (!fir.heap<!fir.array<10x20x30xf16>>, !fir.heap<!fir.array<10x20x30xf16>>)
|
||||
! CHECK: cuf.data_transfer %[[D]]#0 to %[[TEMP]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<10x20x30xf16>>, !fir.heap<!fir.array<10x20x30xf16>>
|
||||
! CHECK: %[[ELE:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<3>) -> !hlfir.expr<10x20x30xf32> {
|
||||
! CHECK: ^bb0(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index):
|
||||
! CHECK: %[[DESIGNATE:.*]] = hlfir.designate %[[TEMP]]#0 (%{{.*}}, %{{.*}}, %{{.*}}) : (!fir.heap<!fir.array<10x20x30xf16>>, index, index, index) -> !fir.ref<f16>
|
||||
! CHECK: %[[LOAD:.*]] = fir.load %[[DESIGNATE]] : !fir.ref<f16>
|
||||
! CHECK: %[[CONV:.*]] = fir.convert %[[LOAD]] : (f16) -> f32
|
||||
! CHECK: hlfir.yield_element %[[CONV]] : f32
|
||||
! CHECK: }
|
||||
! CHECKL: hlfir.assign %[[ELE]] to %[[HD]]#0 : !hlfir.expr<10x20x30xf32>, !fir.ref<!fir.array<10x20x30xf32>>
|
||||
|
Loading…
x
Reference in New Issue
Block a user