[flang][OpenACC] Generalize cross-region GOTO exit handling for all ACC region ops (#187613)
When a `GoTo` inside an ACC region (`acc.loop`, `acc.data`, `acc.parallel`, etc.) targets a label outside that region, the lowering generated an illegal cross-region `cf.br`. This caused MLIR verification failures or stack overflows in `runRegionDCE`'s recursive `propagateLiveness`. This patch addresses the issue with a generalized approach: - Add `genOpenACCRegionExitBranch` helper that detects cross-region branches from any ACC region op and generates the appropriate terminator (`acc.yield` for compute/loop ops, `acc.terminator` for data ops). The helper verifies that `parentOp` is an ACC operation, so it does not interfere with branches inside `scf.execute_region` or other non-ACC regions. - In `genBranch`, when a cross-region exit from an ACC region is detected, store a unique exit ID into a selector variable and generate the region terminator. After the ACC op, a jump table dispatches to the correct target based on the selector. This correctly handles GOTOs that skip intermediate code between the loop end and the target label. - Emit a TODO diagnostic for GOTOs that cross multiple nested ACC region boundaries. - Fix `acc.data` creation when the construct has no data clauses but contains unstructured control flow: skip the early return in `genACCDataOp` so the `acc.data` region is created and blocks are properly managed.
This commit is contained in:
parent
1e99c9e4c7
commit
13cd7a29e8
@ -138,6 +138,14 @@ void setInsertionPointAfterOpenACCLoopIfInside(fir::FirOpBuilder &);
|
||||
|
||||
void genEarlyReturnInOpenACCLoop(fir::FirOpBuilder &, mlir::Location);
|
||||
|
||||
/// If \p targetBlock is outside the ACC region containing the current
|
||||
/// insertion point, generate the appropriate region terminator
|
||||
/// (acc.terminator or acc.yield) instead of a cross-region branch.
|
||||
/// Returns true if the exit was handled, false if no ACC region boundary
|
||||
/// is crossed.
|
||||
bool genOpenACCRegionExitBranch(fir::FirOpBuilder &, mlir::Location,
|
||||
mlir::Block *targetBlock);
|
||||
|
||||
/// Generates an OpenACC loop from a do construct in order to
|
||||
/// properly capture the loop bounds, parallelism determination mode,
|
||||
/// and to privatize the loop variables.
|
||||
|
||||
@ -1609,6 +1609,20 @@ private:
|
||||
|
||||
void genBranch(mlir::Block *targetBlock) {
|
||||
assert(targetBlock && "missing unconditional target block");
|
||||
if (!accRegionExitStack.empty()) {
|
||||
mlir::Region *curRegion = builder->getBlock()->getParent();
|
||||
if (targetBlock->getParent() != curRegion) {
|
||||
auto &exitInfo = accRegionExitStack.back();
|
||||
int exitId = exitInfo.nextId++;
|
||||
exitInfo.exits.push_back({exitId, targetBlock});
|
||||
mlir::Value id = builder->createIntegerConstant(
|
||||
toLocation(), builder->getI32Type(), exitId);
|
||||
fir::StoreOp::create(*builder, toLocation(), id, exitInfo.selector);
|
||||
Fortran::lower::genOpenACCRegionExitBranch(*builder, toLocation(),
|
||||
targetBlock);
|
||||
return;
|
||||
}
|
||||
}
|
||||
mlir::cf::BranchOp::create(*builder, toLocation(), targetBlock);
|
||||
}
|
||||
|
||||
@ -3575,6 +3589,19 @@ private:
|
||||
|
||||
if (!isCacheConstruct)
|
||||
localSymbols.pushScope();
|
||||
// Allocate exit selector for GOTO jump table if the construct is
|
||||
// unstructured (may contain GOTOs that exit the ACC region).
|
||||
bool needsExitSelector = getEval().lowerAsUnstructured();
|
||||
if (needsExitSelector) {
|
||||
AccRegionExitInfo exitInfo;
|
||||
exitInfo.selector =
|
||||
builder->createTemporary(toLocation(), builder->getI32Type());
|
||||
mlir::Value zero = builder->createIntegerConstant(
|
||||
toLocation(), builder->getI32Type(), 0);
|
||||
fir::StoreOp::create(*builder, toLocation(), zero, exitInfo.selector);
|
||||
accRegionExitStack.push_back(std::move(exitInfo));
|
||||
}
|
||||
|
||||
mlir::Value exitCond = genOpenACCConstruct(
|
||||
*this, bridge.getSemanticsContext(), getEval(), acc, localSymbols);
|
||||
|
||||
@ -3677,6 +3704,25 @@ private:
|
||||
localSymbols.popScope();
|
||||
builder->restoreInsertionPoint(insertPt);
|
||||
|
||||
// Generate jump table for GOTO exits from the ACC region.
|
||||
if (needsExitSelector) {
|
||||
auto exitInfo = accRegionExitStack.pop_back_val();
|
||||
if (!exitInfo.exits.empty()) {
|
||||
mlir::Location loc = toLocation();
|
||||
mlir::Value sel = fir::LoadOp::create(*builder, loc, exitInfo.selector);
|
||||
for (auto &[id, target] : exitInfo.exits) {
|
||||
mlir::Value idVal =
|
||||
builder->createIntegerConstant(loc, builder->getI32Type(), id);
|
||||
mlir::Value cmp = mlir::arith::CmpIOp::create(
|
||||
*builder, loc, mlir::arith::CmpIPredicate::eq, sel, idVal);
|
||||
mlir::Block *nextBlock =
|
||||
builder->getBlock()->splitBlock(builder->getBlock()->end());
|
||||
mlir::cf::CondBranchOp::create(*builder, loc, cmp, target, nextBlock);
|
||||
builder->setInsertionPointToEnd(nextBlock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (accLoop && exitCond) {
|
||||
Fortran::lower::pft::FunctionLikeUnit *funit =
|
||||
getEval().getOwningProcedure();
|
||||
@ -7213,6 +7259,14 @@ private:
|
||||
// Stack to manage object deallocation and finalization at construct exits.
|
||||
llvm::SmallVector<ConstructContext> activeConstructStack;
|
||||
|
||||
/// Track GOTO exits from ACC regions for jump table generation.
|
||||
struct AccRegionExitInfo {
|
||||
mlir::Value selector{}; // alloca i32 for exit selector
|
||||
llvm::SmallVector<std::pair<int, mlir::Block *>> exits; // {id, target}
|
||||
int nextId = 1;
|
||||
};
|
||||
llvm::SmallVector<AccRegionExitInfo> accRegionExitStack;
|
||||
|
||||
/// BLOCK name mangling component map
|
||||
int blockId = 0;
|
||||
Fortran::lower::mangle::ScopeBlockIdMap scopeBlockIdMap;
|
||||
|
||||
@ -2768,7 +2768,8 @@ static void genACCDataOp(Fortran::lower::AbstractConverter &converter,
|
||||
addOperands(operands, operandSegments, waitOperands);
|
||||
addOperands(operands, operandSegments, dataClauseOperands);
|
||||
|
||||
if (dataClauseOperands.empty() && !hasDefaultNone && !hasDefaultPresent)
|
||||
if (dataClauseOperands.empty() && !hasDefaultNone && !hasDefaultPresent &&
|
||||
!eval.lowerAsUnstructured())
|
||||
return;
|
||||
|
||||
auto dataOp = createRegionOp<mlir::acc::DataOp, mlir::acc::TerminatorOp>(
|
||||
@ -4566,6 +4567,35 @@ void Fortran::lower::genEarlyReturnInOpenACCLoop(fir::FirOpBuilder &builder,
|
||||
mlir::acc::YieldOp::create(builder, loc, yieldValue);
|
||||
}
|
||||
|
||||
bool Fortran::lower::genOpenACCRegionExitBranch(fir::FirOpBuilder &builder,
|
||||
mlir::Location loc,
|
||||
mlir::Block *targetBlock) {
|
||||
mlir::Block *currentBlock = builder.getBlock();
|
||||
if (!currentBlock || !targetBlock)
|
||||
return false;
|
||||
mlir::Region *curRegion = currentBlock->getParent();
|
||||
mlir::Region *targetRegion = targetBlock->getParent();
|
||||
if (curRegion == targetRegion)
|
||||
return false;
|
||||
|
||||
// Check all regions between the current region and the target for ACC ops.
|
||||
for (mlir::Region *r = curRegion; r && r != targetRegion;
|
||||
r = r->getParentRegion()) {
|
||||
mlir::Operation *op = r->getParentOp();
|
||||
if (op &&
|
||||
mlir::isa<ACC_COMPUTE_CONSTRUCT_OPS, ACC_DATA_CONSTRUCT_STRUCTURED_OPS,
|
||||
mlir::acc::LoopOp>(op)) {
|
||||
if (targetRegion == r->getParentRegion()) {
|
||||
genOpenACCTerminator(builder, op, loc);
|
||||
return true;
|
||||
}
|
||||
TODO(loc, "GOTO exiting OpenACC region");
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t Fortran::lower::getLoopCountForCollapseAndTile(
|
||||
const Fortran::parser::AccClauseList &clauseList) {
|
||||
uint64_t collapseLoopCount = getCollapseSizeAndForce(clauseList).first;
|
||||
|
||||
44
flang/test/Lower/OpenACC/Todo/acc-goto-multi-level-exit.f90
Normal file
44
flang/test/Lower/OpenACC/Todo/acc-goto-multi-level-exit.f90
Normal file
@ -0,0 +1,44 @@
|
||||
! GOTO exits through two nested ACC regions. The branch crosses
|
||||
! two ACC region boundaries, requiring multi-level exit handling.
|
||||
|
||||
! RUN: split-file %s %t
|
||||
! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/nested_data.f90 -o - 2>&1 | FileCheck %s --check-prefix=DATA
|
||||
! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/nested_loop.f90 -o - 2>&1 | FileCheck %s --check-prefix=LOOP
|
||||
|
||||
!--- nested_data.f90
|
||||
|
||||
subroutine nested_data_exit(a, n)
|
||||
integer :: n, i, j
|
||||
real :: a(*)
|
||||
|
||||
!$acc data copy(a(1:n))
|
||||
!$acc data copyout(a(1:n))
|
||||
do i = 1, n
|
||||
do j = 1, n
|
||||
if (a(j) > 0.0) goto 888
|
||||
end do
|
||||
end do
|
||||
!$acc end data
|
||||
!$acc end data
|
||||
888 continue
|
||||
end subroutine
|
||||
|
||||
! DATA: not yet implemented: GOTO exiting OpenACC region
|
||||
|
||||
!--- nested_loop.f90
|
||||
|
||||
subroutine nested_loop_exit(A, B, N)
|
||||
implicit real*8 (a-h, o-z)
|
||||
!$acc routine seq
|
||||
dimension A(*), B(*)
|
||||
!$acc loop seq
|
||||
do 100 i = 1, N
|
||||
!$acc loop seq
|
||||
do 10 j = 1, N
|
||||
if (A(j) .gt. B(j)) goto 200
|
||||
10 continue
|
||||
100 continue
|
||||
200 continue
|
||||
end subroutine
|
||||
|
||||
! LOOP: not yet implemented: GOTO exiting OpenACC region
|
||||
@ -84,3 +84,141 @@ subroutine test_unstructured3(a, b, c)
|
||||
! CHECK: acc.yield
|
||||
|
||||
end subroutine
|
||||
|
||||
! Test that acc.data is still created when there are no data clauses but the
|
||||
! construct contains unstructured control flow. Without this, the early return
|
||||
! in genACCDataOp skips acc.data creation, leaving orphaned blocks.
|
||||
subroutine test_unstructured4(a, n)
|
||||
integer :: n, i, j
|
||||
real :: a(:)
|
||||
logical :: use_gpu
|
||||
|
||||
use_gpu = .true.
|
||||
!$acc data if(use_gpu)
|
||||
do i = 1, n
|
||||
do j = 1, n
|
||||
if (a(j) > 0.0) stop 'unstructured'
|
||||
end do
|
||||
end do
|
||||
!$acc end data
|
||||
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPtest_unstructured4
|
||||
! CHECK: acc.data if(%{{.*}}) {
|
||||
! CHECK: fir.call @_FortranAStopStatementText
|
||||
! CHECK: acc.terminator
|
||||
! CHECK: }
|
||||
|
||||
! Test that GOTO exiting acc.data (one level) generates acc.terminator
|
||||
! instead of an invalid cross-region branch.
|
||||
subroutine test_unstructured5(a, n)
|
||||
integer :: n, i, j
|
||||
real :: a(:)
|
||||
logical :: use_gpu
|
||||
|
||||
use_gpu = .true.
|
||||
!$acc data if(use_gpu)
|
||||
do i = 1, n
|
||||
do j = 1, n
|
||||
if (a(j) > 0.0) goto 999
|
||||
end do
|
||||
end do
|
||||
!$acc end data
|
||||
999 continue
|
||||
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPtest_unstructured5
|
||||
! CHECK: acc.data if(%{{.*}}) {
|
||||
! CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref<i32>
|
||||
! CHECK: acc.terminator
|
||||
! CHECK: acc.terminator
|
||||
! CHECK: }
|
||||
! CHECK: arith.cmpi eq
|
||||
! CHECK: cf.cond_br
|
||||
|
||||
! Test that GOTO exiting acc.loop (one level) generates acc.yield
|
||||
! instead of an invalid cross-region branch.
|
||||
subroutine test_unstructured6(N, A, B)
|
||||
implicit real*8 (a-h, o-z)
|
||||
!$acc routine gang
|
||||
dimension A(*), B(*)
|
||||
!$acc loop gang vector
|
||||
do 100 i = 1, N
|
||||
!$acc loop seq
|
||||
do 10 j = 1, 1000
|
||||
if (A(i) .gt. B(i)) goto 20
|
||||
10 continue
|
||||
20 B(i) = A(i)
|
||||
100 continue
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPtest_unstructured6
|
||||
! CHECK: acc.loop gang vector
|
||||
! CHECK: acc.loop
|
||||
! CHECK: arith.cmpf ogt
|
||||
! CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref<i32>
|
||||
! CHECK: acc.yield
|
||||
! CHECK: } attributes {seq = [#acc.device_type<none>], unstructured}
|
||||
|
||||
! Test GOTO exiting acc.loop with intermediate code between loop end and
|
||||
! target. A jump table (exit selector + dispatch) skips the intermediate code.
|
||||
subroutine test_unstructured7(A, B, C, N)
|
||||
implicit real*8 (a-h, o-z)
|
||||
!$acc routine gang
|
||||
dimension A(*), B(*), C(*)
|
||||
!$acc loop gang vector
|
||||
do 100 i = 1, N
|
||||
!$acc loop seq
|
||||
do 10 j = 1, 1000
|
||||
if (A(i) .gt. B(i)) goto 20
|
||||
10 continue
|
||||
C(i) = 999.0
|
||||
20 B(i) = A(i)
|
||||
100 continue
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPtest_unstructured7
|
||||
! CHECK: acc.loop gang vector
|
||||
! Inner loop stores exit selector and yields:
|
||||
! CHECK: acc.loop
|
||||
! CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref<i32>
|
||||
! CHECK: acc.yield
|
||||
! CHECK: } attributes {seq = [#acc.device_type<none>], unstructured}
|
||||
! Jump table after inner loop:
|
||||
! CHECK: fir.load %{{.*}} : !fir.ref<i32>
|
||||
! CHECK: arith.cmpi eq
|
||||
! CHECK: cf.cond_br
|
||||
! Intermediate code on fall-through path:
|
||||
! CHECK: arith.constant 9.990000e+02
|
||||
|
||||
! Test GOTO exiting acc.data with intermediate code. Jump table dispatches
|
||||
! after the acc.data op.
|
||||
subroutine test_unstructured8(a, n)
|
||||
integer :: n, i, j
|
||||
real :: a(:)
|
||||
logical :: use_gpu
|
||||
use_gpu = .true.
|
||||
!$acc data if(use_gpu)
|
||||
do i = 1, n
|
||||
do j = 1, n
|
||||
if (a(j) > 0.0) goto 999
|
||||
end do
|
||||
end do
|
||||
a(1) = -1.0
|
||||
!$acc end data
|
||||
999 continue
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPtest_unstructured8
|
||||
! Inside acc.data, GOTO stores exit selector and terminates:
|
||||
! CHECK: acc.data if(%{{.*}}) {
|
||||
! CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref<i32>
|
||||
! CHECK: acc.terminator
|
||||
! CHECK: acc.terminator
|
||||
! CHECK: }
|
||||
! Jump table after acc.data:
|
||||
! CHECK: fir.load %{{.*}} : !fir.ref<i32>
|
||||
! CHECK: arith.cmpi eq
|
||||
! CHECK: cf.cond_br
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user