[flang] Use optimal shape for assign expansion as a loop. (#143050)

During `hlfir.assign` inlining and `ElementalAssignBufferization`
we can deduce the optimal shape from `lhs` and `rhs` shapes.
It is probably better be done in a separate pass that propagates
constant shapes, but I have not seen any benchmarks that would
benefit from this yet. So consider this as a workaround for a bigger
TODO issue.

The `ElementalAssignBufferization` case is from 465.tonto,
but I do not have performance results yet (I do not expect much).
This commit is contained in:
Slava Zakharin 2025-06-06 10:45:38 -07:00 committed by GitHub
parent e16f603351
commit ba8077c9dd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 75 additions and 11 deletions

View File

@ -109,9 +109,14 @@ public:
builder.setInsertionPoint(assign);
rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs);
lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
mlir::Value shape = hlfir::genShape(loc, builder, lhs);
mlir::Value lhsShape = hlfir::genShape(loc, builder, lhs);
llvm::SmallVector<mlir::Value> lhsExtents =
hlfir::getIndexExtents(loc, builder, lhsShape);
mlir::Value rhsShape = hlfir::genShape(loc, builder, rhs);
llvm::SmallVector<mlir::Value> rhsExtents =
hlfir::getIndexExtents(loc, builder, rhsShape);
llvm::SmallVector<mlir::Value> extents =
hlfir::getIndexExtents(loc, builder, shape);
fir::factory::deduceOptimalExtents(lhsExtents, rhsExtents);
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
flangomp::shouldUseWorkshareLowering(assign));

View File

@ -700,10 +700,17 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
mlir::Location loc = elemental->getLoc();
fir::FirOpBuilder builder(rewriter, elemental.getOperation());
auto extents = hlfir::getIndexExtents(loc, builder, elemental.getShape());
auto rhsExtents = hlfir::getIndexExtents(loc, builder, elemental.getShape());
// create the loop at the assignment
builder.setInsertionPoint(match->assign);
hlfir::Entity lhs{match->array};
lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
mlir::Value lhsShape = hlfir::genShape(loc, builder, lhs);
llvm::SmallVector<mlir::Value> lhsExtents =
hlfir::getIndexExtents(loc, builder, lhsShape);
llvm::SmallVector<mlir::Value> extents =
fir::factory::deduceOptimalExtents(rhsExtents, lhsExtents);
// Generate a loop nest looping around the hlfir.elemental shape and clone
// hlfir.elemental region inside the inner loop
@ -717,8 +724,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
rewriter.eraseOp(yield);
// Assign the element value to the array element for this iteration.
auto arrayElement = hlfir::getElementAt(
loc, builder, hlfir::Entity{match->array}, loopNest.oneBasedIndices);
auto arrayElement =
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
builder.create<hlfir::AssignOp>(
loc, elementValue, arrayElement, /*realloc=*/false,
/*keep_lhs_length_if_realloc=*/false, match->assign.getTemporaryLhs());

View File

@ -145,18 +145,16 @@ func.func @_QPtest3(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x"})
// CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_10]]#1, %[[VAL_1]] : index
// CHECK: %[[VAL_15:.*]] = fir.shape %[[VAL_12]], %[[VAL_14]] : (index, index) -> !fir.shape<2>
// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_2]]:%[[VAL_9]]#1:%[[VAL_2]], %[[VAL_2]]:%[[VAL_10]]#1:%[[VAL_2]]) shape %[[VAL_15]] : (!fir.box<!fir.array<?x?xf32>>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.box<!fir.array<?x?xf32>>
// CHECK: fir.do_loop %[[VAL_17:.*]] = %[[VAL_2]] to %[[VAL_14]] step %[[VAL_2]] unordered {
// CHECK: fir.do_loop %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_12]] step %[[VAL_2]] unordered {
// CHECK: fir.do_loop %[[VAL_17:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered {
// CHECK: fir.do_loop %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered {
// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_8]] (%[[VAL_18]], %[[VAL_17]]) : (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
// CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<f32>
// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_16]] (%[[VAL_18]], %[[VAL_17]]) : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
// CHECK: hlfir.assign %[[VAL_20]] to %[[VAL_21]] : f32, !fir.ref<f32>
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_22:.*]]:3 = fir.box_dims %[[VAL_4]]#0, %[[VAL_1]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
// CHECK: %[[VAL_23:.*]]:3 = fir.box_dims %[[VAL_4]]#0, %[[VAL_2]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
// CHECK: fir.do_loop %[[VAL_24:.*]] = %[[VAL_2]] to %[[VAL_23]]#1 step %[[VAL_2]] unordered {
// CHECK: fir.do_loop %[[VAL_25:.*]] = %[[VAL_2]] to %[[VAL_22]]#1 step %[[VAL_2]] unordered {
// CHECK: fir.do_loop %[[VAL_24:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered {
// CHECK: fir.do_loop %[[VAL_25:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered {
// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_25]], %[[VAL_24]]) : (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref<f32>
// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_25]], %[[VAL_24]]) : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>

View File

@ -0,0 +1,54 @@
// RUN: fir-opt --opt-bufferization %s | FileCheck %s
// Check that the elemental+assign are rewritten into a loop
// with "optimal" loop bounds, e.g. that we use constants
// when possible.
// CHECK-LABEL: func.func @_QPtest1(
// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index
// CHECK: %[[VAL_1:.*]] = arith.constant 3 : index
// CHECK: fir.do_loop %[[VAL_6:.*]] = %[[VAL_0]] to %[[VAL_1]] step %[[VAL_0]] unordered {
// CHECK-NOT: hlfir.assign{{.*}}array
func.func @_QPtest1(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.ref<!fir.array<3xf32>> {fir.bindc_name = "y"}) {
%c0 = arith.constant 0 : index
%c3 = arith.constant 3 : index
%0 = fir.dummy_scope : !fir.dscope
%1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtest1Ex"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
%2 = fir.shape %c3 : (index) -> !fir.shape<1>
%3:2 = hlfir.declare %arg1(%2) dummy_scope %0 {uniq_name = "_QFtest1Ey"} : (!fir.ref<!fir.array<3xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<3xf32>>, !fir.ref<!fir.array<3xf32>>)
%4:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
%5 = fir.shape %4#1 : (index) -> !fir.shape<1>
%6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
^bb0(%arg2: index):
%7 = hlfir.designate %1#0 (%arg2) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
%8 = fir.load %7 : !fir.ref<f32>
%9 = arith.addf %8, %8 fastmath<contract> : f32
hlfir.yield_element %9 : f32
}
hlfir.assign %6 to %3#0 : !hlfir.expr<?xf32>, !fir.ref<!fir.array<3xf32>>
hlfir.destroy %6 : !hlfir.expr<?xf32>
return
}
// CHECK-LABEL: func.func @_QPtest2(
// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index
// CHECK: %[[VAL_1:.*]] = arith.constant 3 : index
// CHECK: fir.do_loop %[[VAL_6:.*]] = %[[VAL_0]] to %[[VAL_1]] step %[[VAL_0]] unordered {
// CHECK-NOT: hlfir.assign{{.*}}array
func.func @_QPtest2(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.ref<!fir.array<3xf32>> {fir.bindc_name = "y"}) {
%c3 = arith.constant 3 : index
%0 = fir.dummy_scope : !fir.dscope
%1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtest2Ex"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
%2 = fir.shape %c3 : (index) -> !fir.shape<1>
%3:2 = hlfir.declare %arg1(%2) dummy_scope %0 {uniq_name = "_QFtest2Ey"} : (!fir.ref<!fir.array<3xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<3xf32>>, !fir.ref<!fir.array<3xf32>>)
%4 = hlfir.elemental %2 unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> {
^bb0(%arg2: index):
%5 = hlfir.designate %3#0 (%arg2) : (!fir.ref<!fir.array<3xf32>>, index) -> !fir.ref<f32>
%6 = fir.load %5 : !fir.ref<f32>
%7 = arith.addf %6, %6 fastmath<contract> : f32
hlfir.yield_element %7 : f32
}
hlfir.assign %4 to %1#0 : !hlfir.expr<3xf32>, !fir.box<!fir.array<?xf32>>
hlfir.destroy %4 : !hlfir.expr<3xf32>
return
}