
Replaces https://github.com/llvm/llvm-project/pull/121886 Fixes https://github.com/llvm/llvm-project/issues/120254 (hopefully 🤞) ## Problem Consider the following example: ```fortran program test real :: x(1) integer :: i !$omp parallel do reduction(+:x) do i = 1,1 x = 1 end do !$omp end parallel do end program ``` The HLFIR+OMP IR for this example looks like this: ```mlir func.func @_QQmain() { ... omp.parallel { %5 = fir.embox %4#0(%3) : (!fir.ref<!fir.array<1xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<1xf32>> %6 = fir.alloca !fir.box<!fir.array<1xf32>> ... omp.wsloop private(@_QFEi_private_ref_i32 %1#0 -> %arg0 : !fir.ref<i32>) reduction(byref @add_reduction_byref_box_1xf32 %6 -> %arg1 : !fir.ref<!fir.box<!fir.array<1xf32>>>) { omp.loop_nest (%arg2) : i32 = (%c1_i32) to (%c1_i32_0) inclusive step (%c1_i32_1) { ... omp.yield } } omp.terminator } return } ``` The problem addressed by this PR is related to: the `alloca` in the `omp.parallel` region + the related `reduction` clause on the `omp.wsloop` op. When we try translate the reduction from MLIR to LLVM, we have to choose an `alloca` insertion point. This happens in `convertOmpWsloop` where at entry to that function, this is what the LLVM module looks like: ```llvm define void @_QQmain() { %tid.addr = alloca i32, align 4 ... entry: %omp_global_thread_num = call i32 @__kmpc_global_thread_num(ptr @1) br label %omp.par.entry omp.par.entry: %tid.addr.local = alloca i32, align 4 ... br label %omp.par.region omp.par.region: br label %omp.par.region1 omp.par.region1: ... %5 = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 ``` Now, when we choose an `alloca` insertion point for the reduction, this is the chosen block `omp.par.entry` (without the changes in this PR). The problem is that the allocation needed for the reduction needs to reference the `%5` SSA value. This results in inserting allocations in `omp.par.entry` that reference allocations in a later block `omp.par.region1` which causes the `Instruction does not dominate all uses!` error. ## Possible solution - take 2: This PR contains a more localized solution than https://github.com/llvm/llvm-project/pull/121886. It makes sure that on entry to `initReductionVars`, the IR builder is at a point where we can starting inserting initialization region; to make things cleaner, we still split the builder insertion point to a dedicated `omp.reduction.init`. This way we avoid splitting after the latest allocation block; which is what causing the issue.
49 lines
1.5 KiB
MLIR
49 lines
1.5 KiB
MLIR
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
|
|
|
|
// Regression test for https://github.com/llvm/llvm-project/issues/120254.
|
|
|
|
omp.declare_reduction @add_reduction : !llvm.ptr alloc {
|
|
%0 = llvm.mlir.constant(1 : i64) : i64
|
|
%1 = llvm.alloca %0 x !llvm.struct<(ptr)> : (i64) -> !llvm.ptr
|
|
omp.yield(%1 : !llvm.ptr)
|
|
} init {
|
|
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
|
|
%6 = llvm.mlir.constant(1 : i32) : i32
|
|
"llvm.intr.memcpy"(%arg1, %arg0, %6) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
|
|
omp.yield(%arg1 : !llvm.ptr)
|
|
} combiner {
|
|
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
|
|
omp.yield(%arg0 : !llvm.ptr)
|
|
} cleanup {
|
|
^bb0(%arg0: !llvm.ptr):
|
|
omp.yield
|
|
}
|
|
|
|
llvm.func @use_reduction() attributes {fir.bindc_name = "test"} {
|
|
%6 = llvm.mlir.constant(1 : i32) : i32
|
|
omp.parallel {
|
|
%18 = llvm.mlir.constant(1 : i64) : i64
|
|
%19 = llvm.alloca %18 x !llvm.struct<(ptr)> : (i64) -> !llvm.ptr
|
|
omp.wsloop reduction(byref @add_reduction %19 -> %arg0 : !llvm.ptr) {
|
|
omp.loop_nest (%arg1) : i32 = (%6) to (%6) inclusive step (%6) {
|
|
omp.yield
|
|
}
|
|
}
|
|
omp.terminator
|
|
}
|
|
llvm.return
|
|
}
|
|
|
|
// CHECK: omp.par.entry:
|
|
// CHECK: %[[RED_REGION_ALLOC:.*]] = alloca { ptr }, i64 1, align 8
|
|
|
|
// CHECK: omp.par.region:
|
|
// CHECK: br label %omp.par.region1
|
|
|
|
// CHECK: omp.par.region1:
|
|
// CHECK: %[[PAR_REG_VAL:.*]] = alloca { ptr }, i64 1, align 8
|
|
// CHECK: br label %omp.reduction.init
|
|
|
|
// CHECK: omp.reduction.init:
|
|
// CHECK: call void @llvm.memcpy{{.*}}(ptr %[[RED_REGION_ALLOC]], ptr %[[PAR_REG_VAL]], {{.*}})
|