diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp index 6e209cce07ad..6c4a07be52a4 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp @@ -109,9 +109,14 @@ public: builder.setInsertionPoint(assign); rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs); lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs); - mlir::Value shape = hlfir::genShape(loc, builder, lhs); + mlir::Value lhsShape = hlfir::genShape(loc, builder, lhs); + llvm::SmallVector lhsExtents = + hlfir::getIndexExtents(loc, builder, lhsShape); + mlir::Value rhsShape = hlfir::genShape(loc, builder, rhs); + llvm::SmallVector rhsExtents = + hlfir::getIndexExtents(loc, builder, rhsShape); llvm::SmallVector extents = - hlfir::getIndexExtents(loc, builder, shape); + fir::factory::deduceOptimalExtents(lhsExtents, rhsExtents); hlfir::LoopNest loopNest = hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, flangomp::shouldUseWorkshareLowering(assign)); diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index 1af6e014a818..91df8672c20d 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -700,10 +700,17 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite( mlir::Location loc = elemental->getLoc(); fir::FirOpBuilder builder(rewriter, elemental.getOperation()); - auto extents = hlfir::getIndexExtents(loc, builder, elemental.getShape()); + auto rhsExtents = hlfir::getIndexExtents(loc, builder, elemental.getShape()); // create the loop at the assignment builder.setInsertionPoint(match->assign); + hlfir::Entity lhs{match->array}; + lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs); + mlir::Value lhsShape = hlfir::genShape(loc, builder, lhs); + llvm::SmallVector lhsExtents = + hlfir::getIndexExtents(loc, builder, lhsShape); + llvm::SmallVector extents = + fir::factory::deduceOptimalExtents(rhsExtents, lhsExtents); // Generate a loop nest looping around the hlfir.elemental shape and clone // hlfir.elemental region inside the inner loop @@ -717,8 +724,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite( rewriter.eraseOp(yield); // Assign the element value to the array element for this iteration. - auto arrayElement = hlfir::getElementAt( - loc, builder, hlfir::Entity{match->array}, loopNest.oneBasedIndices); + auto arrayElement = + hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices); builder.create( loc, elementValue, arrayElement, /*realloc=*/false, /*keep_lhs_length_if_realloc=*/false, match->assign.getTemporaryLhs()); diff --git a/flang/test/HLFIR/inline-hlfir-assign.fir b/flang/test/HLFIR/inline-hlfir-assign.fir index f834e7971e3d..db71720119e7 100644 --- a/flang/test/HLFIR/inline-hlfir-assign.fir +++ b/flang/test/HLFIR/inline-hlfir-assign.fir @@ -145,18 +145,16 @@ func.func @_QPtest3(%arg0: !fir.box> {fir.bindc_name = "x"}) // CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_10]]#1, %[[VAL_1]] : index // CHECK: %[[VAL_15:.*]] = fir.shape %[[VAL_12]], %[[VAL_14]] : (index, index) -> !fir.shape<2> // CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_2]]:%[[VAL_9]]#1:%[[VAL_2]], %[[VAL_2]]:%[[VAL_10]]#1:%[[VAL_2]]) shape %[[VAL_15]] : (!fir.box>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.box> -// CHECK: fir.do_loop %[[VAL_17:.*]] = %[[VAL_2]] to %[[VAL_14]] step %[[VAL_2]] unordered { -// CHECK: fir.do_loop %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_12]] step %[[VAL_2]] unordered { +// CHECK: fir.do_loop %[[VAL_17:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered { +// CHECK: fir.do_loop %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered { // CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_8]] (%[[VAL_18]], %[[VAL_17]]) : (!fir.ref>, index, index) -> !fir.ref // CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref // CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_16]] (%[[VAL_18]], %[[VAL_17]]) : (!fir.box>, index, index) -> !fir.ref // CHECK: hlfir.assign %[[VAL_20]] to %[[VAL_21]] : f32, !fir.ref // CHECK: } // CHECK: } -// CHECK: %[[VAL_22:.*]]:3 = fir.box_dims %[[VAL_4]]#0, %[[VAL_1]] : (!fir.box>, index) -> (index, index, index) -// CHECK: %[[VAL_23:.*]]:3 = fir.box_dims %[[VAL_4]]#0, %[[VAL_2]] : (!fir.box>, index) -> (index, index, index) -// CHECK: fir.do_loop %[[VAL_24:.*]] = %[[VAL_2]] to %[[VAL_23]]#1 step %[[VAL_2]] unordered { -// CHECK: fir.do_loop %[[VAL_25:.*]] = %[[VAL_2]] to %[[VAL_22]]#1 step %[[VAL_2]] unordered { +// CHECK: fir.do_loop %[[VAL_24:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered { +// CHECK: fir.do_loop %[[VAL_25:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered { // CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_25]], %[[VAL_24]]) : (!fir.ref>, index, index) -> !fir.ref // CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref // CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_25]], %[[VAL_24]]) : (!fir.box>, index, index) -> !fir.ref diff --git a/flang/test/HLFIR/opt-bufferization-elemental-assign-shape.fir b/flang/test/HLFIR/opt-bufferization-elemental-assign-shape.fir new file mode 100644 index 000000000000..b55848225a41 --- /dev/null +++ b/flang/test/HLFIR/opt-bufferization-elemental-assign-shape.fir @@ -0,0 +1,54 @@ +// RUN: fir-opt --opt-bufferization %s | FileCheck %s + +// Check that the elemental+assign are rewritten into a loop +// with "optimal" loop bounds, e.g. that we use constants +// when possible. + +// CHECK-LABEL: func.func @_QPtest1( +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 3 : index +// CHECK: fir.do_loop %[[VAL_6:.*]] = %[[VAL_0]] to %[[VAL_1]] step %[[VAL_0]] unordered { +// CHECK-NOT: hlfir.assign{{.*}}array +func.func @_QPtest1(%arg0: !fir.box> {fir.bindc_name = "x"}, %arg1: !fir.ref> {fir.bindc_name = "y"}) { + %c0 = arith.constant 0 : index + %c3 = arith.constant 3 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtest1Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %2 = fir.shape %c3 : (index) -> !fir.shape<1> + %3:2 = hlfir.declare %arg1(%2) dummy_scope %0 {uniq_name = "_QFtest1Ey"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.ref>, !fir.ref>) + %4:3 = fir.box_dims %1#0, %c0 : (!fir.box>, index) -> (index, index, index) + %5 = fir.shape %4#1 : (index) -> !fir.shape<1> + %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg2: index): + %7 = hlfir.designate %1#0 (%arg2) : (!fir.box>, index) -> !fir.ref + %8 = fir.load %7 : !fir.ref + %9 = arith.addf %8, %8 fastmath : f32 + hlfir.yield_element %9 : f32 + } + hlfir.assign %6 to %3#0 : !hlfir.expr, !fir.ref> + hlfir.destroy %6 : !hlfir.expr + return +} + +// CHECK-LABEL: func.func @_QPtest2( +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 3 : index +// CHECK: fir.do_loop %[[VAL_6:.*]] = %[[VAL_0]] to %[[VAL_1]] step %[[VAL_0]] unordered { +// CHECK-NOT: hlfir.assign{{.*}}array +func.func @_QPtest2(%arg0: !fir.box> {fir.bindc_name = "x"}, %arg1: !fir.ref> {fir.bindc_name = "y"}) { + %c3 = arith.constant 3 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtest2Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %2 = fir.shape %c3 : (index) -> !fir.shape<1> + %3:2 = hlfir.declare %arg1(%2) dummy_scope %0 {uniq_name = "_QFtest2Ey"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.ref>, !fir.ref>) + %4 = hlfir.elemental %2 unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> { + ^bb0(%arg2: index): + %5 = hlfir.designate %3#0 (%arg2) : (!fir.ref>, index) -> !fir.ref + %6 = fir.load %5 : !fir.ref + %7 = arith.addf %6, %6 fastmath : f32 + hlfir.yield_element %7 : f32 + } + hlfir.assign %4 to %1#0 : !hlfir.expr<3xf32>, !fir.box> + hlfir.destroy %4 : !hlfir.expr<3xf32> + return +}