Parallel regions are outlined as functions with capture variables explicitly generated as distinct parameters in the function's argument list. That complicates the fork_call interface in the OpenMP runtime: (1) the fork_call is variadic since there is a variable number of arguments to forward to the outlined function, (2) wrapping/unwrapping arguments happens in the OpenMP runtime, which is sub-optimal, has been a source of ABI bugs, and has a hardcoded limit (16) in the number of arguments, (3) forwarded arguments must cast to pointer types, which complicates debugging. This patch avoids those issues by aggregating captured arguments in a struct to pass to the fork_call. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D102107
154 lines
13 KiB
C++
154 lines
13 KiB
C++
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
|
|
// RUN: %clang_cc1 -triple x86_64-unknown-linux -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1
|
|
|
|
// RUN: %clang_cc1 -triple x86_64-unknown-linux -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
|
|
// expected-no-diagnostics
|
|
|
|
void f(int m) {
|
|
int i;
|
|
int cen[m];
|
|
#pragma omp parallel for
|
|
for (i = 0; i < m; ++i) {
|
|
cen[i] = i;
|
|
}
|
|
}
|
|
|
|
// CHECK1-LABEL: define {{[^@]+}}@_Z1fi
|
|
// CHECK1-SAME: (i32 [[M:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8
|
|
// CHECK1-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[M_ADDR]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12:![0-9]+]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[I]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[M_ADDR]], align 4, !dbg [[DBG15:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG16:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG16]]
|
|
// CHECK1-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG16]]
|
|
// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG16]]
|
|
// CHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG16]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[VLA]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG25:![0-9]+]]
|
|
// CHECK1-NEXT: store i32* [[M_ADDR]], i32** [[TMP3]], align 8, !dbg [[DBG25]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG25]]
|
|
// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP4]], align 8, !dbg [[DBG25]]
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG25]]
|
|
// CHECK1-NEXT: store i32* [[VLA]], i32** [[TMP5]], align 8, !dbg [[DBG25]]
|
|
// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG25]]
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG26:![0-9]+]]
|
|
// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP6]]), !dbg [[DBG26]]
|
|
// CHECK1-NEXT: ret void, !dbg [[DBG26]]
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.
|
|
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG27:![0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8
|
|
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META38:![0-9]+]], metadata !DIExpression()), !dbg [[DBG39:![0-9]+]]
|
|
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG39]]
|
|
// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata %struct.anon** [[__CONTEXT_ADDR]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG39]]
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG42:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0, !dbg [[DBG42]]
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8, !dbg [[DBG42]]
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1, !dbg [[DBG42]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8, !dbg [[DBG42]]
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2, !dbg [[DBG42]]
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8, !dbg [[DBG42]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IV]], metadata [[META43:![0-9]+]], metadata !DIExpression()), !dbg [[DBG39]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTCAPTURE_EXPR_]], metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG39]]
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG45:![0-9]+]]
|
|
// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG45]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTCAPTURE_EXPR_1]], metadata [[META44]], metadata !DIExpression()), !dbg [[DBG39]]
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG45]]
|
|
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0, !dbg [[DBG46:![0-9]+]]
|
|
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[I]], metadata [[META47:![0-9]+]], metadata !DIExpression()), !dbg [[DBG39]]
|
|
// CHECK1-NEXT: store i32 0, i32* [[I]], align 4, !dbg [[DBG48:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG45]]
|
|
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]], !dbg [[DBG46]]
|
|
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG42]]
|
|
// CHECK1: omp.precond.then:
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_LB]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG39]]
|
|
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG50:![0-9]+]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_UB]], metadata [[META51:![0-9]+]], metadata !DIExpression()), !dbg [[DBG39]]
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG50]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_STRIDE]], metadata [[META52:![0-9]+]], metadata !DIExpression()), !dbg [[DBG39]]
|
|
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG50]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IS_LAST]], metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG39]]
|
|
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG50]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[I3]], metadata [[META47]], metadata !DIExpression()), !dbg [[DBG39]]
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG42]]
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !dbg [[DBG42]]
|
|
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG42]]
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG50]]
|
|
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]], !dbg [[DBG50]]
|
|
// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG50]]
|
|
// CHECK1: cond.true:
|
|
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG50]]
|
|
// CHECK1: cond.false:
|
|
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG50]]
|
|
// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG50]]
|
|
// CHECK1: cond.end:
|
|
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ], !dbg [[DBG50]]
|
|
// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG50]]
|
|
// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG50]]
|
|
// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG50]]
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG42]]
|
|
// CHECK1: omp.inner.for.cond:
|
|
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG50]]
|
|
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG50]]
|
|
// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]], !dbg [[DBG46]]
|
|
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG42]]
|
|
// CHECK1: omp.inner.for.body:
|
|
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG50]]
|
|
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1, !dbg [[DBG48]]
|
|
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG48]]
|
|
// CHECK1-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !dbg [[DBG48]]
|
|
// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !dbg [[DBG54:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !dbg [[DBG56:![0-9]+]]
|
|
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG57:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]], !dbg [[DBG57]]
|
|
// CHECK1-NEXT: store i32 [[TMP21]], i32* [[ARRAYIDX]], align 4, !dbg [[DBG58:![0-9]+]]
|
|
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG59:![0-9]+]]
|
|
// CHECK1: omp.body.continue:
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG42]]
|
|
// CHECK1: omp.inner.for.inc:
|
|
// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG50]]
|
|
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG42]], !llvm.loop [[LOOP60:![0-9]+]]
|
|
// CHECK1: omp.inner.for.end:
|
|
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG42]]
|
|
// CHECK1: omp.loop.exit:
|
|
// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG42]]
|
|
// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4, !dbg [[DBG42]]
|
|
// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP25]]), !dbg [[DBG61:![0-9]+]]
|
|
// CHECK1-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG42]]
|
|
// CHECK1: omp.precond.end:
|
|
// CHECK1-NEXT: ret void, !dbg [[DBG62:![0-9]+]]
|
|
//
|