From a224ba068997785daa237eaabb3d11867b898f3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Wed, 25 Feb 2026 10:44:31 -0800 Subject: [PATCH] [flang][cuda] Support data transfer with parenthesis around rhs (#183201) --- flang/include/flang/Lower/CUDA.h | 3 +- flang/lib/Lower/Bridge.cpp | 11 +++---- flang/lib/Lower/CUDA.cpp | 30 +++++++++++++++++--- flang/test/Lower/CUDA/cuda-data-transfer.cuf | 13 +++++++++ 4 files changed, 47 insertions(+), 10 deletions(-) diff --git a/flang/include/flang/Lower/CUDA.h b/flang/include/flang/Lower/CUDA.h index 704b0356c19e..865a7c6a6fa7 100644 --- a/flang/include/flang/Lower/CUDA.h +++ b/flang/include/flang/Lower/CUDA.h @@ -64,7 +64,8 @@ translateSymbolCUFDataAttribute(mlir::MLIRContext *mlirContext, /// Check if the rhs has an implicit conversion. Return the elemental op if /// there is a conversion. Return null otherwise. -hlfir::ElementalOp isTransferWithConversion(mlir::Value rhs); +std::pair +isTransferWithConversion(mlir::Value rhs); /// Check if the value is an allocatable with double descriptor. bool hasDoubleDescriptor(mlir::Value); diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 687c2f0f4a42..a8f405dd03d1 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -5436,11 +5436,12 @@ private: // host = device if (!lhsIsDevice && rhsIsDevice) { - if (auto elementalOp = Fortran::lower::isTransferWithConversion(rhs)) { + auto [firstElOp, elOp] = Fortran::lower::isTransferWithConversion(rhs); + if (firstElOp) { mlir::OpBuilder::InsertionGuard insertionGuard(builder); auto designateOp = - *elementalOp.getBody()->getOps().begin(); - builder.setInsertionPoint(elementalOp); + *firstElOp.getBody()->getOps().begin(); + builder.setInsertionPoint(firstElOp); // Create a temp to transfer the rhs before applying the conversion. hlfir::Entity entity{designateOp.getMemref()}; auto [temp, cleanup] = hlfir::createTempFromMold(loc, builder, entity); @@ -5449,8 +5450,8 @@ private: cuf::DataTransferOp::create(builder, loc, designateOp.getMemref(), temp, /*shape=*/mlir::Value{}, transferKindAttr); designateOp.getMemrefMutable().assign(temp); - builder.setInsertionPointAfter(elementalOp); - hlfir::AssignOp::create(builder, loc, elementalOp, lhs, + builder.setInsertionPointAfter(elOp); + hlfir::AssignOp::create(builder, loc, elOp, lhs, isWholeAllocatableAssignment, keepLhsLengthInAllocatableAssignment); return; diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp index fb055286df46..d8e2d829f9ad 100644 --- a/flang/lib/Lower/CUDA.cpp +++ b/flang/lib/Lower/CUDA.cpp @@ -68,7 +68,15 @@ cuf::DataAttributeAttr Fortran::lower::translateSymbolCUFDataAttribute( return cuf::getDataAttribute(mlirContext, cudaAttr); } -hlfir::ElementalOp Fortran::lower::isTransferWithConversion(mlir::Value rhs) { +std::pair +Fortran::lower::isTransferWithConversion(mlir::Value rhs) { + auto isCopyElementalOp = [](hlfir::ElementalOp elOp) { + return llvm::hasSingleElement( + elOp.getBody()->getOps()) && + llvm::hasSingleElement(elOp.getBody()->getOps()) == 1 && + llvm::hasSingleElement( + elOp.getBody()->getOps()) == 1; + }; auto isConversionElementalOp = [](hlfir::ElementalOp elOp) { return llvm::hasSingleElement( elOp.getBody()->getOps()) && @@ -76,6 +84,11 @@ hlfir::ElementalOp Fortran::lower::isTransferWithConversion(mlir::Value rhs) { llvm::hasSingleElement(elOp.getBody()->getOps()) == 1; }; + auto isConversionFromCopyElementalOp = [](hlfir::ElementalOp elOp) { + return llvm::hasSingleElement(elOp.getBody()->getOps()) && + llvm::hasSingleElement(elOp.getBody()->getOps()) == + 1; + }; if (auto declOp = mlir::dyn_cast(rhs.getDefiningOp())) { if (!declOp.getMemref().getDefiningOp()) return {}; @@ -84,11 +97,20 @@ hlfir::ElementalOp Fortran::lower::isTransferWithConversion(mlir::Value rhs) { if (auto elOp = mlir::dyn_cast( associateOp.getSource().getDefiningOp())) if (isConversionElementalOp(elOp)) - return elOp; + return {elOp, elOp}; } - if (auto elOp = mlir::dyn_cast(rhs.getDefiningOp())) + if (auto elOp = mlir::dyn_cast(rhs.getDefiningOp())) { + if (isConversionFromCopyElementalOp(elOp)) { + auto applyOp = *elOp.getBody()->getOps().begin(); + if (auto firstElOp = mlir::dyn_cast( + applyOp.getExpr().getDefiningOp())) { + if (isCopyElementalOp(firstElOp)) + return {firstElOp, elOp}; + } + } if (isConversionElementalOp(elOp)) - return elOp; + return {elOp, elOp}; + } return {}; } diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf index 9da766157f7e..866a63abd36d 100644 --- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf +++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf @@ -616,3 +616,16 @@ end subroutine ! CHECK-LABEL: func.func @_QPsub32() ! CHECK-COUNT-2: cuf.data_transfer + +subroutine sub33(m, n) + integer :: m, n + real(2), managed :: dc(m,n) + real(4) :: c(m,n) + + c = (dc) +end subroutine + +! CHECK-LABEL: func.func @_QPsub33 +! CHECK: cuf.data_transfer +! CHECK-COUNT-2: hlfir.elemental +! CHECK: hlfir.assign