Following on from the work to implement MLIR -> LLVM IR Translation for Taskloop, this adds support for the following clauses to be used alongside taskloop: - if - grainsize - num_tasks - untied - Nogroup - Final - Mergeable - Priority These clauses are ones which work directly through the relevant OpenMP Runtime functions, so their information just needed collecting from the relevant location and passing through to the appropriate runtime function. Remaining clauses retain their TODO message as they have not yet been implemented.
46 lines
2.2 KiB
MLIR
46 lines
2.2 KiB
MLIR
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
|
|
|
|
omp.private {type = private} @_QFtestEi_private_i32 : i32
|
|
|
|
omp.private {type = firstprivate} @_QFtestEa_firstprivate_i32 : i32 copy {
|
|
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
|
|
%0 = llvm.load %arg0 : !llvm.ptr -> i32
|
|
llvm.store %0, %arg1 : i32, !llvm.ptr
|
|
omp.yield(%arg1 : !llvm.ptr)
|
|
}
|
|
|
|
|
|
llvm.func @_QPtest() {
|
|
%0 = llvm.mlir.constant(1 : i64) : i64
|
|
%1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
|
|
%3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
|
|
%6 = llvm.mlir.constant(20 : i32) : i32
|
|
llvm.store %6, %3 : i32, !llvm.ptr
|
|
%7 = llvm.mlir.constant(1 : i32) : i32
|
|
%8 = llvm.mlir.constant(5 : i32) : i32
|
|
%9 = llvm.mlir.constant(1 : i32) : i32
|
|
%c2_i32 = llvm.mlir.constant(2: i32) : i32
|
|
omp.taskloop num_tasks(%c2_i32 : i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
|
|
omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
|
|
llvm.store %arg2, %arg1 : i32, !llvm.ptr
|
|
%10 = llvm.load %arg0 : !llvm.ptr -> i32
|
|
%11 = llvm.mlir.constant(1 : i32) : i32
|
|
%12 = llvm.add %10, %11 : i32
|
|
llvm.store %12, %arg0 : i32, !llvm.ptr
|
|
omp.yield
|
|
}
|
|
}
|
|
llvm.return
|
|
}
|
|
|
|
// CHECK: %[[structArg:.*]] = alloca { i64, i64, i64, ptr }, align 8
|
|
// CHECK: %[[omp_global_thread_num:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
|
|
// CHECK: %[[VAL_1:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 32, ptr @_QPtest..omp_par)
|
|
// CHECK: %[[VAL_2:.*]] = load ptr, ptr %[[VAL_1]], align 8
|
|
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_2]], ptr align 1 %[[structArg]], i64 32, i1 false)
|
|
// CHECK: %[[VAL_3:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_2]], i32 0, i32 0
|
|
// CHECK: %[[VAL_4:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_2]], i32 0, i32 1
|
|
// CHECK: %[[VAL_5:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_2]], i32 0, i32 2
|
|
// CHECK: %[[VAL_6:.*]] = load i64, ptr %[[VAL_5]], align 4
|
|
// CHECK: call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_1]], i32 1, ptr %[[VAL_3]], ptr %[[VAL_4]], i64 %[[VAL_6]], i32 1, i32 2, i64 2, ptr @omp_taskloop_dup)
|