From aecfaf11953fe6117a15a26b81d86bb75813e09b Mon Sep 17 00:00:00 2001 From: khaki3 <47756807+khaki3@users.noreply.github.com> Date: Tue, 24 Mar 2026 18:03:57 -0700 Subject: [PATCH] [flang][acc] Handle fir.undefined with OutlineRematerializationOpInterface in OffloadLiveInValueCanonicalization (#188325) Example: ```fortran !$ACC KERNELS PRESENT(CG, W1) CG(1:W1%WDES1%NPL, NN) = W1%CPTWFP(1:W1%WDES1%NPL) CPROJ(:, NN) = W1%CPROJ(1:SIZE(CPROJ,1)) !$ACC END KERNELS ``` When compiling OpenACC kernels containing array section assignments of rank-2 arrays with a scalar index in one dimension (e.g. `CG(1:NPL, NN)`), the Fortran lowering creates a `fir.slice` where collapsed (scalar) dimensions use `fir.undefined index` as the stop/step values. `SliceOp::getOutputRank()` relies on `getDefiningOp()` returning `fir::UndefOp` to identify these collapsed dimensions and compute the correct output rank. When `fir.undefined` values defined outside an offload region are used inside it, `gpu-kernel-outlining` turns them into function arguments. Since function arguments have no defining op (`getDefiningOp()` returns `nullptr`), `getOutputRank()` no longer recognizes the collapsed dimensions, computing rank 2 instead of rank 1. This causes the `fir.rebox` verifier to fail with: ``` 'fir.rebox' op result type rank and rank after applying slice operand must match ``` Fix: Register `OutlineRematerializationOpInterface` for `fir::UndefOp` (and `fir::SliceOp`) in `RegisterOpenACCExtensions.cpp`. This causes `OffloadLiveInValueCanonicalization` to clone these operations inside the offload region before outlining, preserving the `fir::UndefOp` identity so that `getOutputRank()` correctly identifies collapsed dimensions. --- .../Support/RegisterOpenACCExtensions.cpp | 4 ++ .../offload-livein-value-canonicalization.fir | 52 +++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp index 75360ab932b0..f2fa5bf38872 100644 --- a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp @@ -81,6 +81,10 @@ void registerOpenACCExtensions(mlir::DialectRegistry ®istry) { OutlineRematerializationModel>(*ctx); fir::ConvertOp::attachInterface< OutlineRematerializationModel>(*ctx); + fir::UndefOp::attachInterface>( + *ctx); + fir::SliceOp::attachInterface>( + *ctx); }); // Register HLFIR operation interfaces diff --git a/flang/test/Fir/OpenACC/offload-livein-value-canonicalization.fir b/flang/test/Fir/OpenACC/offload-livein-value-canonicalization.fir index fa9f9c429fa0..0c661dc3b410 100644 --- a/flang/test/Fir/OpenACC/offload-livein-value-canonicalization.fir +++ b/flang/test/Fir/OpenACC/offload-livein-value-canonicalization.fir @@ -371,3 +371,55 @@ func.func @test_convert_chain_and_direct(%arg0: !fir.boxchar<1>, %arg1: !fir.ref // CHECK: %[[CVT_INT:.*]] = fir.convert %arg1 // CHECK: fir.call @use_ref(%[[DECL]]) // CHECK: fir.call @use_i64(%[[CVT_INT]]) + +// ----- + +// Test fir.undefined sinking into offload region. +// fir.undefined is used in fir.slice to mark collapsed dimensions. +// After gpu-kernel-outlining, function arguments lose their defining op, +// so SliceOp::getOutputRank() can no longer identify collapsed dims. +// The pass must sink fir.undefined to preserve this information. + +func.func @test_undef_slice_sink(%arg0: !fir.box>) { + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : index + %undef = fir.undefined index + %slice = fir.slice %c1, %c10, %c1, %c1, %undef, %undef : (index, index, index, index, index, index) -> !fir.slice<2> + acc.serial { + %rebox = fir.rebox %arg0 [%slice] : (!fir.box>, !fir.slice<2>) -> !fir.box> + acc.yield + } + return +} + +// CHECK-LABEL: @test_undef_slice_sink +// CHECK: acc.serial { +// CHECK: %[[UNDEF:.*]] = fir.undefined index +// CHECK: %[[SLICE:.*]] = fir.slice {{.*}}, %[[UNDEF]], %[[UNDEF]] +// CHECK: fir.rebox %arg0 [%[[SLICE]]] + +// ----- + +// Test fir.undefined rematerialization (used both inside and outside region). + +func.func @test_undef_slice_rematerialize(%arg0: !fir.box>) { + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : index + %undef = fir.undefined index + %slice = fir.slice %c1, %c10, %c1, %c1, %undef, %undef : (index, index, index, index, index, index) -> !fir.slice<2> + %rebox_outer = fir.rebox %arg0 [%slice] : (!fir.box>, !fir.slice<2>) -> !fir.box> + acc.serial { + %rebox_inner = fir.rebox %arg0 [%slice] : (!fir.box>, !fir.slice<2>) -> !fir.box> + acc.yield + } + return +} + +// CHECK-LABEL: @test_undef_slice_rematerialize +// CHECK: %[[UNDEF_OUTER:.*]] = fir.undefined index +// CHECK: %[[SLICE_OUTER:.*]] = fir.slice {{.*}}, %[[UNDEF_OUTER]], %[[UNDEF_OUTER]] +// CHECK: fir.rebox %arg0 [%[[SLICE_OUTER]]] +// CHECK: acc.serial { +// CHECK: %[[UNDEF_INNER:.*]] = fir.undefined index +// CHECK: %[[SLICE_INNER:.*]] = fir.slice {{.*}}, %[[UNDEF_INNER]], %[[UNDEF_INNER]] +// CHECK: fir.rebox %arg0 [%[[SLICE_INNER]]]