
This PR changes the emitted block structure of alloc, init, and copy regions for `omp.private` and `omp.declare_reduction` ops a little bit. In particular, this decouples init and copy regions from the alloca insertion-point. The main motivation is fix "Instruction does not dominate all uses!" errors that happen specially when an init region uses a value from the OpenMP region it is being inlined into. The issue happens because, previous to this PR, we inline the init region right after the latest alloc block (since we used the alloca IP); which in some cases (see exmaple below), is too early and causes the use dominance issue. Example that would break without this PR (when delayed privatization is enabled for `omp.wsloop`s): ```fortran subroutine test2 (xyz) integer :: i integer :: xyz(:) !$omp target map(from:xyz) !$omp do private(xyz) do i = 1, 10 xyz(i) = i end do !$omp end target end subroutine ```
206 lines
7.7 KiB
MLIR
206 lines
7.7 KiB
MLIR
// Test code-gen for `omp.parallel` ops with delayed privatizers (i.e. using
|
|
// `omp.private` ops).
|
|
|
|
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
|
|
|
|
llvm.func @parallel_op_firstprivate(%arg0: !llvm.ptr) {
|
|
omp.parallel private(@x.privatizer %arg0 -> %arg2 : !llvm.ptr) {
|
|
%0 = llvm.load %arg2 : !llvm.ptr -> f32
|
|
omp.terminator
|
|
}
|
|
llvm.return
|
|
}
|
|
|
|
omp.private {type = firstprivate} @x.privatizer : f32 copy {
|
|
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
|
|
%0 = llvm.load %arg0 : !llvm.ptr -> f32
|
|
llvm.store %0, %arg1 : f32, !llvm.ptr
|
|
omp.yield(%arg1 : !llvm.ptr)
|
|
}
|
|
|
|
// CHECK-LABEL: @parallel_op_firstprivate
|
|
// CHECK-SAME: (ptr %[[ORIG:.*]]) {
|
|
// CHECK: %[[OMP_PAR_ARG:.*]] = alloca { ptr }, align 8
|
|
// CHECK: %[[ORIG_GEP:.*]] = getelementptr { ptr }, ptr %[[OMP_PAR_ARG]], i32 0, i32 0
|
|
// CHECK: store ptr %[[ORIG]], ptr %[[ORIG_GEP]], align 8
|
|
// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @parallel_op_firstprivate..omp_par, ptr %[[OMP_PAR_ARG]])
|
|
// CHECK: }
|
|
|
|
// CHECK-LABEL: void @parallel_op_firstprivate..omp_par
|
|
// CHECK-SAME: (ptr noalias %{{.*}}, ptr noalias %{{.*}}, ptr %[[ARG:.*]])
|
|
// CHECK: %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[ARG]], i32 0, i32 0
|
|
// CHECK: %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
|
|
|
|
// Check that the privatizer alloc region was inlined properly.
|
|
// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, align 4
|
|
|
|
// Check that the privatizer copy region was inlined properly.
|
|
|
|
// CHECK: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
|
|
// CHECK: store float %[[ORIG_VAL]], ptr %[[PRIV_ALLOC]], align 4
|
|
// CHECK-NEXT: br
|
|
|
|
// Check that the privatized value is used (rather than the original one).
|
|
// CHECK: load float, ptr %[[PRIV_ALLOC]], align 4
|
|
// CHECK: }
|
|
|
|
// -----
|
|
|
|
llvm.func @parallel_op_firstprivate_multi_block(%arg0: !llvm.ptr) {
|
|
omp.parallel private(@multi_block.privatizer %arg0 -> %arg2 : !llvm.ptr) {
|
|
%0 = llvm.load %arg2 : !llvm.ptr -> f32
|
|
omp.terminator
|
|
}
|
|
llvm.return
|
|
}
|
|
|
|
// CHECK-LABEL: define internal void @parallel_op_firstprivate_multi_block..omp_par
|
|
// CHECK: omp.par.entry:
|
|
// CHECK: %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %{{.*}}, i32 0, i32 0
|
|
// CHECK: %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
|
|
// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, align 4
|
|
// CHECK-NEXT: br label %omp.region.after_alloca
|
|
|
|
// CHECK: omp.region.after_alloca:
|
|
// CHECK-NEXT: br label %[[PAR_REG:.*]]
|
|
|
|
// Check that the body of the parallel region loads from the private clone.
|
|
// CHECK: [[PAR_REG]]:
|
|
// CHECK: br label %[[PRIV_BB1:.*]]
|
|
|
|
// CHECK: [[PRIV_BB1]]:
|
|
// The 1st `alloc` block directly branches to the 2nd `alloc` block since the
|
|
// only insruction is `llvm.mlir.constant` which gets translated to compile-time
|
|
// constant in LLVM IR.
|
|
// CHECK-NEXT: br label %[[PRIV_BB2:.*]]
|
|
|
|
// CHECK: [[PRIV_BB2]]:
|
|
// CHECK-NEXT: br label %[[PRIV_BB3:.*]]
|
|
|
|
// CHECK: [[PRIV_BB3]]:
|
|
// CHECK-NEXT: br label %omp.region.cont
|
|
|
|
// CHECK: omp.region.cont:
|
|
// CHECK-NEXT: %[[PRIV_ALLOC2:.*]] = phi ptr [ %[[PRIV_ALLOC]], %[[PRIV_BB3]] ]
|
|
// CHECK-NEXT: br label %omp.private.copy
|
|
|
|
// CHECK: omp.private.copy:
|
|
// CHECK-NEXT: br label %omp.private.copy4
|
|
|
|
// CHECK: omp.private.copy4:
|
|
// CHECK-NEXT: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
|
|
// CHECK-NEXT: br label %[[PRIV_BB3:.*]]
|
|
|
|
// Check contents of the 2nd block in the `copy` region.
|
|
// CHECK: [[PRIV_BB3]]:
|
|
// CHECK-NEXT: %[[ORIG_VAL2:.*]] = phi float [ %[[ORIG_VAL]], %omp.private.copy4 ]
|
|
// CHECK-NEXT: %[[PRIV_ALLOC3:.*]] = phi ptr [ %[[PRIV_ALLOC2]], %omp.private.copy4 ]
|
|
// CHECK-NEXT: store float %[[ORIG_VAL2]], ptr %[[PRIV_ALLOC3]], align 4
|
|
// CHECK-NEXT: br label %[[PRIV_CONT:.*]]
|
|
|
|
// Check that the privatizer's continuation block yileds the private clone's
|
|
// address.
|
|
// CHECK: [[PRIV_CONT]]:
|
|
// CHECK-NEXT: %[[PRIV_ALLOC4:.*]] = phi ptr [ %[[PRIV_ALLOC3]], %[[PRIV_BB3]] ]
|
|
// CHECK: %{{.*}} = load float, ptr %[[PRIV_ALLOC2]], align 4
|
|
|
|
omp.private {type = firstprivate} @multi_block.privatizer : f32 init {
|
|
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
|
|
llvm.br ^bb1
|
|
|
|
^bb1:
|
|
omp.yield(%arg1 : !llvm.ptr)
|
|
|
|
} copy {
|
|
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
|
|
%0 = llvm.load %arg0 : !llvm.ptr -> f32
|
|
llvm.br ^bb1(%0, %arg1 : f32, !llvm.ptr)
|
|
|
|
^bb1(%arg2: f32, %arg3: !llvm.ptr):
|
|
llvm.store %arg2, %arg3 : f32, !llvm.ptr
|
|
omp.yield(%arg3 : !llvm.ptr)
|
|
}
|
|
|
|
// -----
|
|
|
|
// Verifies fix for https://github.com/llvm/llvm-project/issues/102935.
|
|
//
|
|
// The issue happens since we previously failed to match MLIR values to their
|
|
// corresponding LLVM values in some cases (e.g. char strings with non-const
|
|
// length).
|
|
llvm.func @non_const_len_char_test(%n: !llvm.ptr {fir.bindc_name = "n"}) {
|
|
%n_val = llvm.load %n : !llvm.ptr -> i64
|
|
%orig_alloc = llvm.alloca %n_val x i8 {bindc_name = "str"} : (i64) -> !llvm.ptr
|
|
%orig_val = llvm.mlir.undef : !llvm.struct<(ptr, i64)>
|
|
%orig_val_init = llvm.insertvalue %orig_alloc, %orig_val[0] : !llvm.struct<(ptr, i64)>
|
|
omp.parallel private(@non_const_len_char %orig_val_init -> %priv_arg : !llvm.struct<(ptr, i64)>) {
|
|
%dummy = llvm.extractvalue %priv_arg[0] : !llvm.struct<(ptr, i64)>
|
|
omp.terminator
|
|
}
|
|
llvm.return
|
|
}
|
|
|
|
omp.private {type = firstprivate} @non_const_len_char : !llvm.struct<(ptr, i64)> init {
|
|
^bb0(%orig_val: !llvm.struct<(ptr, i64)>, %arg1: !llvm.struct<(ptr, i64)>):
|
|
%str_len = llvm.extractvalue %orig_val[1] : !llvm.struct<(ptr, i64)>
|
|
%priv_alloc = llvm.alloca %str_len x i8 {bindc_name = "str", pinned} : (i64) -> !llvm.ptr
|
|
%priv_val = llvm.mlir.undef : !llvm.struct<(ptr, i64)>
|
|
%priv_val_init = llvm.insertvalue %priv_alloc, %priv_val[0] : !llvm.struct<(ptr, i64)>
|
|
omp.yield(%priv_val_init : !llvm.struct<(ptr, i64)>)
|
|
} copy {
|
|
^bb0(%orig_val: !llvm.struct<(ptr, i64)>, %priv_val: !llvm.struct<(ptr, i64)>):
|
|
llvm.call @foo() : () -> ()
|
|
omp.yield(%priv_val : !llvm.struct<(ptr, i64)>)
|
|
}
|
|
|
|
llvm.func @foo()
|
|
|
|
// CHECK-LABEL: @non_const_len_char_test..omp_par({{.*}})
|
|
// CHECK-NEXT: omp.par.entry:
|
|
// Verify that we found the privatizer by checking that we properly inlined the
|
|
// bodies of the alloc and copy regions.
|
|
// CHECK: %[[STR_LEN:.*]] = extractvalue { ptr, i64 } %{{.*}}, 1
|
|
// CHECK: %{{.*}} = alloca i8, i64 %[[STR_LEN]], align 1
|
|
// CHECK: call void @foo()
|
|
|
|
// -----
|
|
|
|
// Verifies fix for https://github.com/llvm/llvm-project/issues/102939.
|
|
//
|
|
// The issues occurs because the CodeExtractor component only collect inputs
|
|
// (to the parallel regions) that are defined in the same function in which the
|
|
// parallel regions is present. Howerver, this is problematic because if we are
|
|
// privatizing a global value (e.g. a `target` variable which is emitted as a
|
|
// global), then we miss finding that input and we do not privatize the
|
|
// variable.
|
|
|
|
omp.private {type = firstprivate} @global_privatizer : f32 copy {
|
|
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
|
|
%0 = llvm.load %arg0 : !llvm.ptr -> f32
|
|
llvm.store %0, %arg1 : f32, !llvm.ptr
|
|
omp.yield(%arg1 : !llvm.ptr)
|
|
}
|
|
|
|
llvm.func @global_accessor() {
|
|
%global_addr = llvm.mlir.addressof @global : !llvm.ptr
|
|
omp.parallel private(@global_privatizer %global_addr -> %arg0 : !llvm.ptr) {
|
|
%1 = llvm.mlir.constant(3.140000e+00 : f32) : f32
|
|
llvm.store %1, %arg0 : f32, !llvm.ptr
|
|
omp.terminator
|
|
}
|
|
llvm.return
|
|
}
|
|
|
|
llvm.mlir.global internal @global() {addr_space = 0 : i32} : f32 {
|
|
%0 = llvm.mlir.zero : f32
|
|
llvm.return %0 : f32
|
|
}
|
|
|
|
// CHECK-LABEL: @global_accessor..omp_par({{.*}})
|
|
// CHECK-NEXT: omp.par.entry:
|
|
// Verify that we found the privatizer by checking that we properly inlined the
|
|
// bodies of the alloc and copy regions.
|
|
// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, align 4
|
|
// CHECK: %[[GLOB_VAL:.*]] = load float, ptr @global, align 4
|
|
// CHECK: store float %[[GLOB_VAL]], ptr %[[PRIV_ALLOC]], align 4
|