The OpenMPIRBuilder has a bug. Specifically, suppose you have two nested openmp parallel regions (writing with MLIR for ease)
```
omp.parallel {
%a = ...
omp.parallel {
use(%a)
}
}
```
As OpenMP only permits pointer-like inputs, the builder will wrap all of the inputs into a stack allocation, and then pass this
allocation to the inner parallel. For example, we would want to get something like the following:
```
omp.parallel {
%a = ...
%tmp = alloc
store %tmp[] = %a
kmpc_fork(outlined, %tmp)
}
```
However, in practice, this is not what currently occurs in the context of nested parallel regions. Specifically to the OpenMPIRBuilder,
the entirety of the function (at the LLVM level) is currently inlined with blocks marking the corresponding start and end of each
region.
```
entry:
...
parallel1:
%a = ...
...
parallel2:
use(%a)
...
endparallel2:
...
endparallel1:
...
```
When the allocation is inserted, it presently inserted into the parent of the entire function (e.g. entry) rather than the parent
allocation scope to the function being outlined. If we were outlining parallel2, the corresponding alloca location would be parallel1.
This causes a variety of bugs, including https://github.com/llvm/llvm-project/issues/54165 as one example.
This PR allows the stack allocation to be created at the correct allocation block, and thus remedies such issues.
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D121061
315 lines
21 KiB
C
315 lines
21 KiB
C
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
|
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefixes=CHECK %s
|
|
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -verify %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK-DEBUG %s
|
|
|
|
// expected-no-diagnostics
|
|
|
|
// TODO: Teach the update script to check new functions too.
|
|
|
|
#ifndef HEADER
|
|
#define HEADER
|
|
|
|
// CHECK-LABEL: @_Z14parallel_for_0v(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
|
|
// CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
|
|
// CHECK: omp_parallel:
|
|
// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*))
|
|
// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
|
|
// CHECK: omp.par.outlined.exit:
|
|
// CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
|
// CHECK: omp.par.exit.split:
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
// CHECK-DEBUG-LABEL: @_Z14parallel_for_0v(
|
|
// CHECK-DEBUG-NEXT: entry:
|
|
// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG13:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]]
|
|
// CHECK-DEBUG: omp_parallel:
|
|
// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*)), !dbg [[DBG14:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
|
|
// CHECK-DEBUG: omp.par.outlined.exit:
|
|
// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
|
// CHECK-DEBUG: omp.par.exit.split:
|
|
// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG18:![0-9]+]]
|
|
//
|
|
void parallel_for_0(void) {
|
|
#pragma omp parallel
|
|
{
|
|
#pragma omp for
|
|
for (int i = 0; i < 100; ++i) {
|
|
}
|
|
}
|
|
}
|
|
|
|
// CHECK-LABEL: @_Z14parallel_for_1Pfid(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[STRUCTARG17:%.*]] = alloca { i32*, double*, float** }, align 8
|
|
// CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
|
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
|
|
// CHECK-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8
|
|
// CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
|
|
// CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8
|
|
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
|
|
// CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
|
|
// CHECK: omp_parallel:
|
|
// CHECK-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0
|
|
// CHECK-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8
|
|
// CHECK-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1
|
|
// CHECK-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8
|
|
// CHECK-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2
|
|
// CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8
|
|
// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG17]])
|
|
// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]]
|
|
// CHECK: omp.par.outlined.exit16:
|
|
// CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
|
// CHECK: omp.par.exit.split:
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
// CHECK-DEBUG-LABEL: @_Z14parallel_for_1Pfid(
|
|
// CHECK-DEBUG-NEXT: entry:
|
|
// CHECK-DEBUG-NEXT: [[STRUCTARG17:%.*]] = alloca { i32*, double*, float** }, align 8
|
|
// CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
|
// CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
|
|
// CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8
|
|
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
|
|
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8
|
|
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG78:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]]
|
|
// CHECK-DEBUG: omp_parallel:
|
|
// CHECK-DEBUG-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0
|
|
// CHECK-DEBUG-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8
|
|
// CHECK-DEBUG-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1
|
|
// CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8
|
|
// CHECK-DEBUG-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2
|
|
// CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8
|
|
// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG17]]), !dbg [[DBG79:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]]
|
|
// CHECK-DEBUG: omp.par.outlined.exit16:
|
|
// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
|
// CHECK-DEBUG: omp.par.exit.split:
|
|
// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG81:![0-9]+]]
|
|
//
|
|
void parallel_for_1(float *r, int a, double b) {
|
|
#pragma omp parallel
|
|
{
|
|
#pragma omp parallel
|
|
{
|
|
#pragma omp for
|
|
for (int i = 0; i < 100; ++i) {
|
|
*r = a + b;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// CHECK-LABEL: @_Z14parallel_for_2Pfid(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
|
|
// CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
|
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
|
|
// CHECK-NEXT: [[I185:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[AGG_CAPTURED186:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8
|
|
// CHECK-NEXT: [[AGG_CAPTURED187:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4
|
|
// CHECK-NEXT: [[DOTCOUNT_ADDR188:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[P_LASTITER203:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[P_LOWERBOUND204:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8
|
|
// CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
|
|
// CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8
|
|
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
|
|
// CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
|
|
// CHECK: omp_parallel:
|
|
// CHECK-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 0
|
|
// CHECK-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
|
|
// CHECK-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 1
|
|
// CHECK-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8
|
|
// CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 2
|
|
// CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8
|
|
// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]])
|
|
// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]]
|
|
// CHECK: omp.par.outlined.exit184:
|
|
// CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
|
// CHECK: omp.par.exit.split:
|
|
// CHECK-NEXT: store i32 0, i32* [[I185]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0
|
|
// CHECK-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8
|
|
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4
|
|
// CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4
|
|
// CHECK-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]])
|
|
// CHECK-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4
|
|
// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]]
|
|
// CHECK: omp_loop.preheader190:
|
|
// CHECK-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1
|
|
// CHECK-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4
|
|
// CHECK-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4
|
|
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
|
|
// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 0)
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4
|
|
// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4
|
|
// CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]]
|
|
// CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1
|
|
// CHECK-NEXT: br label [[OMP_LOOP_HEADER191:%.*]]
|
|
// CHECK: omp_loop.header191:
|
|
// CHECK-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ]
|
|
// CHECK-NEXT: br label [[OMP_LOOP_COND192:%.*]]
|
|
// CHECK: omp_loop.cond192:
|
|
// CHECK-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]]
|
|
// CHECK-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]]
|
|
// CHECK: omp_loop.body193:
|
|
// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]]
|
|
// CHECK-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]])
|
|
// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4
|
|
// CHECK-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double
|
|
// CHECK-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8
|
|
// CHECK-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]]
|
|
// CHECK-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float
|
|
// CHECK-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8
|
|
// CHECK-NEXT: store float [[CONV202]], float* [[TMP11]], align 4
|
|
// CHECK-NEXT: br label [[OMP_LOOP_INC194]]
|
|
// CHECK: omp_loop.inc194:
|
|
// CHECK-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1
|
|
// CHECK-NEXT: br label [[OMP_LOOP_HEADER191]]
|
|
// CHECK: omp_loop.exit195:
|
|
// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]])
|
|
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
|
|
// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]])
|
|
// CHECK-NEXT: br label [[OMP_LOOP_AFTER196:%.*]]
|
|
// CHECK: omp_loop.after196:
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
// CHECK-DEBUG-LABEL: @_Z14parallel_for_2Pfid(
|
|
// CHECK-DEBUG-NEXT: entry:
|
|
// CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
|
|
// CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
|
// CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
|
|
// CHECK-DEBUG-NEXT: [[I185:%.*]] = alloca i32, align 4
|
|
// CHECK-DEBUG-NEXT: [[AGG_CAPTURED186:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8
|
|
// CHECK-DEBUG-NEXT: [[AGG_CAPTURED187:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4
|
|
// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR188:%.*]] = alloca i32, align 4
|
|
// CHECK-DEBUG-NEXT: [[P_LASTITER203:%.*]] = alloca i32, align 4
|
|
// CHECK-DEBUG-NEXT: [[P_LOWERBOUND204:%.*]] = alloca i32, align 4
|
|
// CHECK-DEBUG-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4
|
|
// CHECK-DEBUG-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4
|
|
// CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8
|
|
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
|
|
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8
|
|
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]]
|
|
// CHECK-DEBUG: omp_parallel:
|
|
// CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 0
|
|
// CHECK-DEBUG-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
|
|
// CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 1
|
|
// CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8
|
|
// CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 2
|
|
// CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8
|
|
// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]]), !dbg [[DBG140:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]]
|
|
// CHECK-DEBUG: omp.par.outlined.exit184:
|
|
// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
|
// CHECK-DEBUG: omp.par.exit.split:
|
|
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: store i32 0, i32* [[I185]], align 4, !dbg [[DBG147]]
|
|
// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG149:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]]
|
|
// CHECK-DEBUG: omp_loop.preheader190:
|
|
// CHECK-DEBUG-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]]
|
|
// CHECK-DEBUG: omp_loop.header191:
|
|
// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]]
|
|
// CHECK-DEBUG: omp_loop.cond192:
|
|
// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]]
|
|
// CHECK-DEBUG: omp_loop.body193:
|
|
// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG150:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG151:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG151]]
|
|
// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG150]]
|
|
// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG152:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG151]]
|
|
// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]]
|
|
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG148]]
|
|
// CHECK-DEBUG: omp_loop.inc194:
|
|
// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]]
|
|
// CHECK-DEBUG: omp_loop.exit195:
|
|
// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]]
|
|
// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG150]]
|
|
// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]]
|
|
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]]
|
|
// CHECK-DEBUG: omp_loop.after196:
|
|
// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG155:![0-9]+]]
|
|
//
|
|
void parallel_for_2(float *r, int a, double b) {
|
|
#pragma omp parallel
|
|
{
|
|
#pragma omp for
|
|
for (int i = 0; i < 100; ++i)
|
|
*r = a + b;
|
|
#pragma omp parallel
|
|
{
|
|
#pragma omp for
|
|
for (int i = 0; i < 100; ++i)
|
|
*r = a + b;
|
|
#pragma omp parallel
|
|
{
|
|
#pragma omp for
|
|
for (int i = 0; i < 100; ++i)
|
|
*r = a + b;
|
|
}
|
|
#pragma omp for
|
|
for (int i = 0; i < 100; ++i)
|
|
*r = a + b;
|
|
#pragma omp parallel
|
|
{
|
|
#pragma omp for
|
|
for (int i = 0; i < 100; ++i)
|
|
*r = a + b;
|
|
}
|
|
#pragma omp for
|
|
for (int i = 0; i < 100; ++i)
|
|
*r = a + b;
|
|
}
|
|
#pragma omp for
|
|
for (int i = 0; i < 100; ++i)
|
|
*r = a + b;
|
|
}
|
|
#pragma omp for
|
|
for (int i = 0; i < 100; ++i)
|
|
*r = a + b;
|
|
}
|
|
|
|
#endif
|