From 13cd7a29e8951787e2ef68b1920d080984628baf Mon Sep 17 00:00:00 2001 From: khaki3 <47756807+khaki3@users.noreply.github.com> Date: Mon, 30 Mar 2026 06:35:08 -0700 Subject: [PATCH] [flang][OpenACC] Generalize cross-region GOTO exit handling for all ACC region ops (#187613) When a `GoTo` inside an ACC region (`acc.loop`, `acc.data`, `acc.parallel`, etc.) targets a label outside that region, the lowering generated an illegal cross-region `cf.br`. This caused MLIR verification failures or stack overflows in `runRegionDCE`'s recursive `propagateLiveness`. This patch addresses the issue with a generalized approach: - Add `genOpenACCRegionExitBranch` helper that detects cross-region branches from any ACC region op and generates the appropriate terminator (`acc.yield` for compute/loop ops, `acc.terminator` for data ops). The helper verifies that `parentOp` is an ACC operation, so it does not interfere with branches inside `scf.execute_region` or other non-ACC regions. - In `genBranch`, when a cross-region exit from an ACC region is detected, store a unique exit ID into a selector variable and generate the region terminator. After the ACC op, a jump table dispatches to the correct target based on the selector. This correctly handles GOTOs that skip intermediate code between the loop end and the target label. - Emit a TODO diagnostic for GOTOs that cross multiple nested ACC region boundaries. - Fix `acc.data` creation when the construct has no data clauses but contains unstructured control flow: skip the early return in `genACCDataOp` so the `acc.data` region is created and blocks are properly managed. --- flang/include/flang/Lower/OpenACC.h | 8 + flang/lib/Lower/Bridge.cpp | 54 +++++++ flang/lib/Lower/OpenACC.cpp | 32 +++- .../Todo/acc-goto-multi-level-exit.f90 | 44 ++++++ flang/test/Lower/OpenACC/acc-unstructured.f90 | 138 ++++++++++++++++++ 5 files changed, 275 insertions(+), 1 deletion(-) create mode 100644 flang/test/Lower/OpenACC/Todo/acc-goto-multi-level-exit.f90 diff --git a/flang/include/flang/Lower/OpenACC.h b/flang/include/flang/Lower/OpenACC.h index c2a950f36c5a..8e0f3da22ba9 100644 --- a/flang/include/flang/Lower/OpenACC.h +++ b/flang/include/flang/Lower/OpenACC.h @@ -138,6 +138,14 @@ void setInsertionPointAfterOpenACCLoopIfInside(fir::FirOpBuilder &); void genEarlyReturnInOpenACCLoop(fir::FirOpBuilder &, mlir::Location); +/// If \p targetBlock is outside the ACC region containing the current +/// insertion point, generate the appropriate region terminator +/// (acc.terminator or acc.yield) instead of a cross-region branch. +/// Returns true if the exit was handled, false if no ACC region boundary +/// is crossed. +bool genOpenACCRegionExitBranch(fir::FirOpBuilder &, mlir::Location, + mlir::Block *targetBlock); + /// Generates an OpenACC loop from a do construct in order to /// properly capture the loop bounds, parallelism determination mode, /// and to privatize the loop variables. diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 647bc741e7ed..5251e85d56c8 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -1609,6 +1609,20 @@ private: void genBranch(mlir::Block *targetBlock) { assert(targetBlock && "missing unconditional target block"); + if (!accRegionExitStack.empty()) { + mlir::Region *curRegion = builder->getBlock()->getParent(); + if (targetBlock->getParent() != curRegion) { + auto &exitInfo = accRegionExitStack.back(); + int exitId = exitInfo.nextId++; + exitInfo.exits.push_back({exitId, targetBlock}); + mlir::Value id = builder->createIntegerConstant( + toLocation(), builder->getI32Type(), exitId); + fir::StoreOp::create(*builder, toLocation(), id, exitInfo.selector); + Fortran::lower::genOpenACCRegionExitBranch(*builder, toLocation(), + targetBlock); + return; + } + } mlir::cf::BranchOp::create(*builder, toLocation(), targetBlock); } @@ -3575,6 +3589,19 @@ private: if (!isCacheConstruct) localSymbols.pushScope(); + // Allocate exit selector for GOTO jump table if the construct is + // unstructured (may contain GOTOs that exit the ACC region). + bool needsExitSelector = getEval().lowerAsUnstructured(); + if (needsExitSelector) { + AccRegionExitInfo exitInfo; + exitInfo.selector = + builder->createTemporary(toLocation(), builder->getI32Type()); + mlir::Value zero = builder->createIntegerConstant( + toLocation(), builder->getI32Type(), 0); + fir::StoreOp::create(*builder, toLocation(), zero, exitInfo.selector); + accRegionExitStack.push_back(std::move(exitInfo)); + } + mlir::Value exitCond = genOpenACCConstruct( *this, bridge.getSemanticsContext(), getEval(), acc, localSymbols); @@ -3677,6 +3704,25 @@ private: localSymbols.popScope(); builder->restoreInsertionPoint(insertPt); + // Generate jump table for GOTO exits from the ACC region. + if (needsExitSelector) { + auto exitInfo = accRegionExitStack.pop_back_val(); + if (!exitInfo.exits.empty()) { + mlir::Location loc = toLocation(); + mlir::Value sel = fir::LoadOp::create(*builder, loc, exitInfo.selector); + for (auto &[id, target] : exitInfo.exits) { + mlir::Value idVal = + builder->createIntegerConstant(loc, builder->getI32Type(), id); + mlir::Value cmp = mlir::arith::CmpIOp::create( + *builder, loc, mlir::arith::CmpIPredicate::eq, sel, idVal); + mlir::Block *nextBlock = + builder->getBlock()->splitBlock(builder->getBlock()->end()); + mlir::cf::CondBranchOp::create(*builder, loc, cmp, target, nextBlock); + builder->setInsertionPointToEnd(nextBlock); + } + } + } + if (accLoop && exitCond) { Fortran::lower::pft::FunctionLikeUnit *funit = getEval().getOwningProcedure(); @@ -7213,6 +7259,14 @@ private: // Stack to manage object deallocation and finalization at construct exits. llvm::SmallVector activeConstructStack; + /// Track GOTO exits from ACC regions for jump table generation. + struct AccRegionExitInfo { + mlir::Value selector{}; // alloca i32 for exit selector + llvm::SmallVector> exits; // {id, target} + int nextId = 1; + }; + llvm::SmallVector accRegionExitStack; + /// BLOCK name mangling component map int blockId = 0; Fortran::lower::mangle::ScopeBlockIdMap scopeBlockIdMap; diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index ffadef41bf17..51bc7597b5c9 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -2768,7 +2768,8 @@ static void genACCDataOp(Fortran::lower::AbstractConverter &converter, addOperands(operands, operandSegments, waitOperands); addOperands(operands, operandSegments, dataClauseOperands); - if (dataClauseOperands.empty() && !hasDefaultNone && !hasDefaultPresent) + if (dataClauseOperands.empty() && !hasDefaultNone && !hasDefaultPresent && + !eval.lowerAsUnstructured()) return; auto dataOp = createRegionOp( @@ -4566,6 +4567,35 @@ void Fortran::lower::genEarlyReturnInOpenACCLoop(fir::FirOpBuilder &builder, mlir::acc::YieldOp::create(builder, loc, yieldValue); } +bool Fortran::lower::genOpenACCRegionExitBranch(fir::FirOpBuilder &builder, + mlir::Location loc, + mlir::Block *targetBlock) { + mlir::Block *currentBlock = builder.getBlock(); + if (!currentBlock || !targetBlock) + return false; + mlir::Region *curRegion = currentBlock->getParent(); + mlir::Region *targetRegion = targetBlock->getParent(); + if (curRegion == targetRegion) + return false; + + // Check all regions between the current region and the target for ACC ops. + for (mlir::Region *r = curRegion; r && r != targetRegion; + r = r->getParentRegion()) { + mlir::Operation *op = r->getParentOp(); + if (op && + mlir::isa(op)) { + if (targetRegion == r->getParentRegion()) { + genOpenACCTerminator(builder, op, loc); + return true; + } + TODO(loc, "GOTO exiting OpenACC region"); + } + } + + return false; +} + uint64_t Fortran::lower::getLoopCountForCollapseAndTile( const Fortran::parser::AccClauseList &clauseList) { uint64_t collapseLoopCount = getCollapseSizeAndForce(clauseList).first; diff --git a/flang/test/Lower/OpenACC/Todo/acc-goto-multi-level-exit.f90 b/flang/test/Lower/OpenACC/Todo/acc-goto-multi-level-exit.f90 new file mode 100644 index 000000000000..7f11b4a7b70e --- /dev/null +++ b/flang/test/Lower/OpenACC/Todo/acc-goto-multi-level-exit.f90 @@ -0,0 +1,44 @@ +! GOTO exits through two nested ACC regions. The branch crosses +! two ACC region boundaries, requiring multi-level exit handling. + +! RUN: split-file %s %t +! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/nested_data.f90 -o - 2>&1 | FileCheck %s --check-prefix=DATA +! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/nested_loop.f90 -o - 2>&1 | FileCheck %s --check-prefix=LOOP + +!--- nested_data.f90 + +subroutine nested_data_exit(a, n) + integer :: n, i, j + real :: a(*) + + !$acc data copy(a(1:n)) + !$acc data copyout(a(1:n)) + do i = 1, n + do j = 1, n + if (a(j) > 0.0) goto 888 + end do + end do + !$acc end data + !$acc end data +888 continue +end subroutine + +! DATA: not yet implemented: GOTO exiting OpenACC region + +!--- nested_loop.f90 + +subroutine nested_loop_exit(A, B, N) + implicit real*8 (a-h, o-z) + !$acc routine seq + dimension A(*), B(*) + !$acc loop seq + do 100 i = 1, N + !$acc loop seq + do 10 j = 1, N + if (A(j) .gt. B(j)) goto 200 +10 continue +100 continue +200 continue +end subroutine + +! LOOP: not yet implemented: GOTO exiting OpenACC region diff --git a/flang/test/Lower/OpenACC/acc-unstructured.f90 b/flang/test/Lower/OpenACC/acc-unstructured.f90 index bbbf8974a6c3..57b678c1200d 100644 --- a/flang/test/Lower/OpenACC/acc-unstructured.f90 +++ b/flang/test/Lower/OpenACC/acc-unstructured.f90 @@ -84,3 +84,141 @@ subroutine test_unstructured3(a, b, c) ! CHECK: acc.yield end subroutine + +! Test that acc.data is still created when there are no data clauses but the +! construct contains unstructured control flow. Without this, the early return +! in genACCDataOp skips acc.data creation, leaving orphaned blocks. +subroutine test_unstructured4(a, n) + integer :: n, i, j + real :: a(:) + logical :: use_gpu + + use_gpu = .true. + !$acc data if(use_gpu) + do i = 1, n + do j = 1, n + if (a(j) > 0.0) stop 'unstructured' + end do + end do + !$acc end data + +end subroutine + +! CHECK-LABEL: func.func @_QPtest_unstructured4 +! CHECK: acc.data if(%{{.*}}) { +! CHECK: fir.call @_FortranAStopStatementText +! CHECK: acc.terminator +! CHECK: } + +! Test that GOTO exiting acc.data (one level) generates acc.terminator +! instead of an invalid cross-region branch. +subroutine test_unstructured5(a, n) + integer :: n, i, j + real :: a(:) + logical :: use_gpu + + use_gpu = .true. + !$acc data if(use_gpu) + do i = 1, n + do j = 1, n + if (a(j) > 0.0) goto 999 + end do + end do + !$acc end data +999 continue + +end subroutine + +! CHECK-LABEL: func.func @_QPtest_unstructured5 +! CHECK: acc.data if(%{{.*}}) { +! CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref +! CHECK: acc.terminator +! CHECK: acc.terminator +! CHECK: } +! CHECK: arith.cmpi eq +! CHECK: cf.cond_br + +! Test that GOTO exiting acc.loop (one level) generates acc.yield +! instead of an invalid cross-region branch. +subroutine test_unstructured6(N, A, B) + implicit real*8 (a-h, o-z) + !$acc routine gang + dimension A(*), B(*) + !$acc loop gang vector + do 100 i = 1, N + !$acc loop seq + do 10 j = 1, 1000 + if (A(i) .gt. B(i)) goto 20 +10 continue +20 B(i) = A(i) +100 continue +end subroutine + +! CHECK-LABEL: func.func @_QPtest_unstructured6 +! CHECK: acc.loop gang vector +! CHECK: acc.loop +! CHECK: arith.cmpf ogt +! CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref +! CHECK: acc.yield +! CHECK: } attributes {seq = [#acc.device_type], unstructured} + +! Test GOTO exiting acc.loop with intermediate code between loop end and +! target. A jump table (exit selector + dispatch) skips the intermediate code. +subroutine test_unstructured7(A, B, C, N) + implicit real*8 (a-h, o-z) + !$acc routine gang + dimension A(*), B(*), C(*) + !$acc loop gang vector + do 100 i = 1, N + !$acc loop seq + do 10 j = 1, 1000 + if (A(i) .gt. B(i)) goto 20 +10 continue + C(i) = 999.0 +20 B(i) = A(i) +100 continue +end subroutine + +! CHECK-LABEL: func.func @_QPtest_unstructured7 +! CHECK: acc.loop gang vector +! Inner loop stores exit selector and yields: +! CHECK: acc.loop +! CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref +! CHECK: acc.yield +! CHECK: } attributes {seq = [#acc.device_type], unstructured} +! Jump table after inner loop: +! CHECK: fir.load %{{.*}} : !fir.ref +! CHECK: arith.cmpi eq +! CHECK: cf.cond_br +! Intermediate code on fall-through path: +! CHECK: arith.constant 9.990000e+02 + +! Test GOTO exiting acc.data with intermediate code. Jump table dispatches +! after the acc.data op. +subroutine test_unstructured8(a, n) + integer :: n, i, j + real :: a(:) + logical :: use_gpu + use_gpu = .true. + !$acc data if(use_gpu) + do i = 1, n + do j = 1, n + if (a(j) > 0.0) goto 999 + end do + end do + a(1) = -1.0 + !$acc end data +999 continue +end subroutine + +! CHECK-LABEL: func.func @_QPtest_unstructured8 +! Inside acc.data, GOTO stores exit selector and terminates: +! CHECK: acc.data if(%{{.*}}) { +! CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref +! CHECK: acc.terminator +! CHECK: acc.terminator +! CHECK: } +! Jump table after acc.data: +! CHECK: fir.load %{{.*}} : !fir.ref +! CHECK: arith.cmpi eq +! CHECK: cf.cond_br