llvm-project/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
Kareem Ergawy 84c3b05e5e
[OpenMP][flang][MLIR] Decouple alloc, init, and copy regions for omp.private|declare_reduction ops (#125699)
This PR changes the emitted block structure of alloc, init, and copy
regions for `omp.private` and `omp.declare_reduction` ops a little bit.
In particular, this decouples init and copy regions from the alloca
insertion-point. The main motivation is fix "Instruction does not
dominate all uses!" errors that happen specially when an init region
uses a value from the OpenMP region it is being inlined into. The issue
happens because, previous to this PR, we inline the init region right
after the latest alloc block (since we used the alloca IP); which in
some cases (see exmaple below), is too early and causes the use
dominance issue.

Example that would break without this PR (when delayed privatization is
enabled for `omp.wsloop`s):
```fortran
subroutine test2 (xyz)
  integer :: i
  integer :: xyz(:)

  !$omp target map(from:xyz)
    !$omp do private(xyz)
      do i = 1, 10
        xyz(i) = i
      end do
  !$omp end target
end subroutine
```
2025-02-06 11:45:40 +01:00

206 lines
7.7 KiB
MLIR

// Test code-gen for `omp.parallel` ops with delayed privatizers (i.e. using
// `omp.private` ops).
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
llvm.func @parallel_op_firstprivate(%arg0: !llvm.ptr) {
omp.parallel private(@x.privatizer %arg0 -> %arg2 : !llvm.ptr) {
%0 = llvm.load %arg2 : !llvm.ptr -> f32
omp.terminator
}
llvm.return
}
omp.private {type = firstprivate} @x.privatizer : f32 copy {
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
%0 = llvm.load %arg0 : !llvm.ptr -> f32
llvm.store %0, %arg1 : f32, !llvm.ptr
omp.yield(%arg1 : !llvm.ptr)
}
// CHECK-LABEL: @parallel_op_firstprivate
// CHECK-SAME: (ptr %[[ORIG:.*]]) {
// CHECK: %[[OMP_PAR_ARG:.*]] = alloca { ptr }, align 8
// CHECK: %[[ORIG_GEP:.*]] = getelementptr { ptr }, ptr %[[OMP_PAR_ARG]], i32 0, i32 0
// CHECK: store ptr %[[ORIG]], ptr %[[ORIG_GEP]], align 8
// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @parallel_op_firstprivate..omp_par, ptr %[[OMP_PAR_ARG]])
// CHECK: }
// CHECK-LABEL: void @parallel_op_firstprivate..omp_par
// CHECK-SAME: (ptr noalias %{{.*}}, ptr noalias %{{.*}}, ptr %[[ARG:.*]])
// CHECK: %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[ARG]], i32 0, i32 0
// CHECK: %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
// Check that the privatizer alloc region was inlined properly.
// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, align 4
// Check that the privatizer copy region was inlined properly.
// CHECK: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
// CHECK: store float %[[ORIG_VAL]], ptr %[[PRIV_ALLOC]], align 4
// CHECK-NEXT: br
// Check that the privatized value is used (rather than the original one).
// CHECK: load float, ptr %[[PRIV_ALLOC]], align 4
// CHECK: }
// -----
llvm.func @parallel_op_firstprivate_multi_block(%arg0: !llvm.ptr) {
omp.parallel private(@multi_block.privatizer %arg0 -> %arg2 : !llvm.ptr) {
%0 = llvm.load %arg2 : !llvm.ptr -> f32
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define internal void @parallel_op_firstprivate_multi_block..omp_par
// CHECK: omp.par.entry:
// CHECK: %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %{{.*}}, i32 0, i32 0
// CHECK: %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, align 4
// CHECK-NEXT: br label %omp.region.after_alloca
// CHECK: omp.region.after_alloca:
// CHECK-NEXT: br label %[[PAR_REG:.*]]
// Check that the body of the parallel region loads from the private clone.
// CHECK: [[PAR_REG]]:
// CHECK: br label %[[PRIV_BB1:.*]]
// CHECK: [[PRIV_BB1]]:
// The 1st `alloc` block directly branches to the 2nd `alloc` block since the
// only insruction is `llvm.mlir.constant` which gets translated to compile-time
// constant in LLVM IR.
// CHECK-NEXT: br label %[[PRIV_BB2:.*]]
// CHECK: [[PRIV_BB2]]:
// CHECK-NEXT: br label %[[PRIV_BB3:.*]]
// CHECK: [[PRIV_BB3]]:
// CHECK-NEXT: br label %omp.region.cont
// CHECK: omp.region.cont:
// CHECK-NEXT: %[[PRIV_ALLOC2:.*]] = phi ptr [ %[[PRIV_ALLOC]], %[[PRIV_BB3]] ]
// CHECK-NEXT: br label %omp.private.copy
// CHECK: omp.private.copy:
// CHECK-NEXT: br label %omp.private.copy4
// CHECK: omp.private.copy4:
// CHECK-NEXT: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
// CHECK-NEXT: br label %[[PRIV_BB3:.*]]
// Check contents of the 2nd block in the `copy` region.
// CHECK: [[PRIV_BB3]]:
// CHECK-NEXT: %[[ORIG_VAL2:.*]] = phi float [ %[[ORIG_VAL]], %omp.private.copy4 ]
// CHECK-NEXT: %[[PRIV_ALLOC3:.*]] = phi ptr [ %[[PRIV_ALLOC2]], %omp.private.copy4 ]
// CHECK-NEXT: store float %[[ORIG_VAL2]], ptr %[[PRIV_ALLOC3]], align 4
// CHECK-NEXT: br label %[[PRIV_CONT:.*]]
// Check that the privatizer's continuation block yileds the private clone's
// address.
// CHECK: [[PRIV_CONT]]:
// CHECK-NEXT: %[[PRIV_ALLOC4:.*]] = phi ptr [ %[[PRIV_ALLOC3]], %[[PRIV_BB3]] ]
// CHECK: %{{.*}} = load float, ptr %[[PRIV_ALLOC2]], align 4
omp.private {type = firstprivate} @multi_block.privatizer : f32 init {
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
llvm.br ^bb1
^bb1:
omp.yield(%arg1 : !llvm.ptr)
} copy {
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
%0 = llvm.load %arg0 : !llvm.ptr -> f32
llvm.br ^bb1(%0, %arg1 : f32, !llvm.ptr)
^bb1(%arg2: f32, %arg3: !llvm.ptr):
llvm.store %arg2, %arg3 : f32, !llvm.ptr
omp.yield(%arg3 : !llvm.ptr)
}
// -----
// Verifies fix for https://github.com/llvm/llvm-project/issues/102935.
//
// The issue happens since we previously failed to match MLIR values to their
// corresponding LLVM values in some cases (e.g. char strings with non-const
// length).
llvm.func @non_const_len_char_test(%n: !llvm.ptr {fir.bindc_name = "n"}) {
%n_val = llvm.load %n : !llvm.ptr -> i64
%orig_alloc = llvm.alloca %n_val x i8 {bindc_name = "str"} : (i64) -> !llvm.ptr
%orig_val = llvm.mlir.undef : !llvm.struct<(ptr, i64)>
%orig_val_init = llvm.insertvalue %orig_alloc, %orig_val[0] : !llvm.struct<(ptr, i64)>
omp.parallel private(@non_const_len_char %orig_val_init -> %priv_arg : !llvm.struct<(ptr, i64)>) {
%dummy = llvm.extractvalue %priv_arg[0] : !llvm.struct<(ptr, i64)>
omp.terminator
}
llvm.return
}
omp.private {type = firstprivate} @non_const_len_char : !llvm.struct<(ptr, i64)> init {
^bb0(%orig_val: !llvm.struct<(ptr, i64)>, %arg1: !llvm.struct<(ptr, i64)>):
%str_len = llvm.extractvalue %orig_val[1] : !llvm.struct<(ptr, i64)>
%priv_alloc = llvm.alloca %str_len x i8 {bindc_name = "str", pinned} : (i64) -> !llvm.ptr
%priv_val = llvm.mlir.undef : !llvm.struct<(ptr, i64)>
%priv_val_init = llvm.insertvalue %priv_alloc, %priv_val[0] : !llvm.struct<(ptr, i64)>
omp.yield(%priv_val_init : !llvm.struct<(ptr, i64)>)
} copy {
^bb0(%orig_val: !llvm.struct<(ptr, i64)>, %priv_val: !llvm.struct<(ptr, i64)>):
llvm.call @foo() : () -> ()
omp.yield(%priv_val : !llvm.struct<(ptr, i64)>)
}
llvm.func @foo()
// CHECK-LABEL: @non_const_len_char_test..omp_par({{.*}})
// CHECK-NEXT: omp.par.entry:
// Verify that we found the privatizer by checking that we properly inlined the
// bodies of the alloc and copy regions.
// CHECK: %[[STR_LEN:.*]] = extractvalue { ptr, i64 } %{{.*}}, 1
// CHECK: %{{.*}} = alloca i8, i64 %[[STR_LEN]], align 1
// CHECK: call void @foo()
// -----
// Verifies fix for https://github.com/llvm/llvm-project/issues/102939.
//
// The issues occurs because the CodeExtractor component only collect inputs
// (to the parallel regions) that are defined in the same function in which the
// parallel regions is present. Howerver, this is problematic because if we are
// privatizing a global value (e.g. a `target` variable which is emitted as a
// global), then we miss finding that input and we do not privatize the
// variable.
omp.private {type = firstprivate} @global_privatizer : f32 copy {
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
%0 = llvm.load %arg0 : !llvm.ptr -> f32
llvm.store %0, %arg1 : f32, !llvm.ptr
omp.yield(%arg1 : !llvm.ptr)
}
llvm.func @global_accessor() {
%global_addr = llvm.mlir.addressof @global : !llvm.ptr
omp.parallel private(@global_privatizer %global_addr -> %arg0 : !llvm.ptr) {
%1 = llvm.mlir.constant(3.140000e+00 : f32) : f32
llvm.store %1, %arg0 : f32, !llvm.ptr
omp.terminator
}
llvm.return
}
llvm.mlir.global internal @global() {addr_space = 0 : i32} : f32 {
%0 = llvm.mlir.zero : f32
llvm.return %0 : f32
}
// CHECK-LABEL: @global_accessor..omp_par({{.*}})
// CHECK-NEXT: omp.par.entry:
// Verify that we found the privatizer by checking that we properly inlined the
// bodies of the alloc and copy regions.
// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, align 4
// CHECK: %[[GLOB_VAL:.*]] = load float, ptr @global, align 4
// CHECK: store float %[[GLOB_VAL]], ptr %[[PRIV_ALLOC]], align 4