Summary: Memory globalization is required to maintain OpenMP standard semantics for data sharing between worker and master threads. The GPU cannot share data between its threads so must allocate global or shared memory to store the data in. Currently this is implemented fully in the frontend using the `__kmpc_data_sharing_push_stack` and __kmpc_data_sharing_pop_stack` functions to emulate standard CPU stack sharing. The front-end scans the target region for variables that escape the region and must be shared between the threads. Each variable then has a field created for it in a global record type. This patch replaces this functinality with a single allocation command, effectively mimicing an alloca instruction for the variables that must be shared between the threads. This will be much slower than the current solution, but makes it much easier to optimize as we can analyze each variable independently and determine if it is not captured. In the future, we can replace these calls with an `alloca` and small allocations can be pushed to shared memory. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D97680
725 lines
69 KiB
C++
725 lines
69 KiB
C++
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
|
|
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=45
|
|
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=45 | FileCheck %s --check-prefix=CHECK1
|
|
// expected-no-diagnostics
|
|
|
|
template <unsigned *ddd>
|
|
struct S {
|
|
static int a;
|
|
};
|
|
|
|
extern unsigned aaa;
|
|
template<> int S<&aaa>::a;
|
|
|
|
template struct S<&aaa>;
|
|
|
|
int main() {
|
|
/* int(*b)[a]; */
|
|
/* int *(**c)[a]; */
|
|
bool bb;
|
|
int a;
|
|
int b[10][10];
|
|
int c[10][10][10];
|
|
#pragma omp target parallel firstprivate(a, b) map(tofrom \
|
|
: c) map(tofrom \
|
|
: bb) if (target:a)
|
|
{
|
|
int &f = c[1][1][1];
|
|
int &g = a;
|
|
int &h = b[1][1];
|
|
int d = 15;
|
|
a = 5;
|
|
b[0][a] = 10;
|
|
c[0][0][a] = 11;
|
|
b[0][a] = c[0][0][a];
|
|
bb |= b[0][a];
|
|
}
|
|
#pragma omp target parallel firstprivate(a) map(tofrom \
|
|
: c, b) map(to \
|
|
: bb)
|
|
{
|
|
int &f = c[1][1][1];
|
|
int &g = a;
|
|
int &h = b[1][1];
|
|
int d = 15;
|
|
a = 5;
|
|
b[0][a] = 10;
|
|
c[0][0][a] = 11;
|
|
b[0][a] = c[0][0][a];
|
|
d = bb;
|
|
}
|
|
#pragma omp target parallel map(tofrom \
|
|
: a, c, b) map(from \
|
|
: bb)
|
|
{
|
|
int &f = c[1][1][1];
|
|
int &g = a;
|
|
int &h = b[1][1];
|
|
int d = 15;
|
|
a = 5;
|
|
b[0][a] = 10;
|
|
c[0][0][a] = 11;
|
|
b[0][a] = c[0][0][a];
|
|
bb = b[0][a];
|
|
}
|
|
return 0;
|
|
}
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__
|
|
// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias [[C:%.*]], i32 [[A:%.*]], [10 x [10 x i32]]* noalias [[B:%.*]], i8 addrspace(1)* noalias [[BB:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG22:![0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8
|
|
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8 addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8
|
|
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca [10 x [10 x i32]]*, align 8
|
|
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG38:![0-9]+]]
|
|
// CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40:![0-9]+]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG42:![0-9]+]]
|
|
// CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META43:![0-9]+]], metadata !DIExpression()), !dbg [[DBG44:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG45:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG45]]
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG45]]
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG45]]
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG45]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP3]], [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG45]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG45]]
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG45]]
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast i8 addrspace(1)* [[TMP5]] to i8*, !dbg [[DBG45]]
|
|
// CHECK1-NEXT: store i8* [[TMP6]], i8** [[_TMP2]], align 8, !dbg [[DBG45]]
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG45]]
|
|
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !dbg [[DBG45]]
|
|
// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1), !dbg [[DBG45]]
|
|
// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]], !dbg [[DBG45]]
|
|
// CHECK1: .execute:
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG46:![0-9]+]]
|
|
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: store i32 [[TMP9]], i32* [[CONV]], align 4, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[A_CASTED]], align 8, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [10 x [10 x [10 x i32]]]* [[TMP2]] to i8*, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8*, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP4]] to i8*, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP17]], align 8, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG46]]
|
|
// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i64, [10 x [10 x i32]]*, i8*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP18]], i64 4), !dbg [[DBG46]]
|
|
// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]], !dbg [[DBG47:![0-9]+]]
|
|
// CHECK1: .omp.deinit:
|
|
// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 1), !dbg [[DBG47]]
|
|
// CHECK1-NEXT: br label [[DOTEXIT:%.*]], !dbg [[DBG47]]
|
|
// CHECK1: .exit:
|
|
// CHECK1-NEXT: ret void, !dbg [[DBG49:![0-9]+]]
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__
|
|
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]] addrspace(1)* noalias [[C:%.*]], i32 [[A:%.*]], [10 x [10 x i32]]* noalias [[B:%.*]], i8 addrspace(1)* noalias [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG50:![0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8
|
|
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8 addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8
|
|
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca [10 x [10 x i32]]*, align 8
|
|
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: [[B3:%.*]] = alloca [10 x [10 x i32]], align 4
|
|
// CHECK1-NEXT: [[F:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[G:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[H:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META57:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58:![0-9]+]]
|
|
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58]]
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META60:![0-9]+]], metadata !DIExpression()), !dbg [[DBG61:![0-9]+]]
|
|
// CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META62:![0-9]+]], metadata !DIExpression()), !dbg [[DBG63:![0-9]+]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65:![0-9]+]]
|
|
// CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG68:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG68]]
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG68]]
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG68]]
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG68]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP3]], [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG68]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG68]]
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG68]]
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast i8 addrspace(1)* [[TMP5]] to i8*, !dbg [[DBG68]]
|
|
// CHECK1-NEXT: store i8* [[TMP6]], i8** [[_TMP2]], align 8, !dbg [[DBG68]]
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG68]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]* [[B3]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58]]
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [10 x [10 x i32]]* [[B3]] to i8*, !dbg [[DBG68]]
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [10 x [10 x i32]]* [[TMP4]] to i8*, !dbg [[DBG68]]
|
|
// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 400, i1 false), !dbg [[DBG68]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[F]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 1, !dbg [[DBG74:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG74]]
|
|
// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX4]], i64 0, i64 1, !dbg [[DBG74]]
|
|
// CHECK1-NEXT: store i32* [[ARRAYIDX5]], i32** [[F]], align 8, !dbg [[DBG73]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[G]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]]
|
|
// CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[G]], align 8, !dbg [[DBG76]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[H]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B3]], i64 0, i64 1, !dbg [[DBG79:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG79]]
|
|
// CHECK1-NEXT: store i32* [[ARRAYIDX7]], i32** [[H]], align 8, !dbg [[DBG78]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[D]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG81:![0-9]+]]
|
|
// CHECK1-NEXT: store i32 15, i32* [[D]], align 4, !dbg [[DBG81]]
|
|
// CHECK1-NEXT: store i32 5, i32* [[A_ADDR]], align 4, !dbg [[DBG82:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B3]], i64 0, i64 0, !dbg [[DBG83:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG84:![0-9]+]]
|
|
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG83]]
|
|
// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG83]]
|
|
// CHECK1-NEXT: store i32 10, i32* [[ARRAYIDX9]], align 4, !dbg [[DBG85:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG86:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG86]]
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG87:![0-9]+]]
|
|
// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG86]]
|
|
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG86]]
|
|
// CHECK1-NEXT: store i32 11, i32* [[ARRAYIDX13]], align 4, !dbg [[DBG88:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG89:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG89]]
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG90:![0-9]+]]
|
|
// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG89]]
|
|
// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG89]]
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX17]], align 4, !dbg [[DBG89]]
|
|
// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B3]], i64 0, i64 0, !dbg [[DBG91:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG92:![0-9]+]]
|
|
// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG91]]
|
|
// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG91]]
|
|
// CHECK1-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX20]], align 4, !dbg [[DBG93:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B3]], i64 0, i64 0, !dbg [[DBG94:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG95:![0-9]+]]
|
|
// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP15]] to i64, !dbg [[DBG94]]
|
|
// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG94]]
|
|
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX23]], align 4, !dbg [[DBG94]]
|
|
// CHECK1-NEXT: [[TMP17:%.*]] = load i8, i8* [[TMP7]], align 1, !dbg [[DBG96:![0-9]+]]
|
|
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG96]]
|
|
// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG96]]
|
|
// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP16]], !dbg [[DBG96]]
|
|
// CHECK1-NEXT: [[TOBOOL24:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG96]]
|
|
// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL24]] to i8, !dbg [[DBG96]]
|
|
// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[TMP7]], align 1, !dbg [[DBG96]]
|
|
// CHECK1-NEXT: ret void, !dbg [[DBG97:![0-9]+]]
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__
|
|
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG98:![0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8
|
|
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8
|
|
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META105:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106:![0-9]+]]
|
|
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META107:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]]
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META108:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]]
|
|
// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]]
|
|
// CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]]
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG112:![0-9]+]]
|
|
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG112]]
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG112]]
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG112]]
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG112]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG112]]
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG112]]
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG112]]
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG112]]
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG112]]
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP5]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG112]]
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast i8* [[TMP8]] to i8 addrspace(1)*, !dbg [[DBG112]]
|
|
// CHECK1-NEXT: call void @__omp_outlined___debug__(i32* [[TMP3]], i32* [[TMP4]], [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP9]], i32 [[TMP6]], [10 x [10 x i32]]* [[TMP7]], i8 addrspace(1)* [[TMP10]]) #[[ATTR4:[0-9]+]], !dbg [[DBG112]]
|
|
// CHECK1-NEXT: ret void, !dbg [[DBG112]]
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
|
|
// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG113:![0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8
|
|
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8
|
|
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG117:![0-9]+]]
|
|
// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG117]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG117]]
|
|
// CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG117]]
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG121:![0-9]+]]
|
|
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG121]]
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG121]]
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG121]]
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG121]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG121]]
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG121]]
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG121]]
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG121]]
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast i8* [[TMP6]] to i8 addrspace(1)*, !dbg [[DBG121]]
|
|
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__([10 x [10 x [10 x i32]]] addrspace(1)* [[TMP7]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP5]], i8 addrspace(1)* [[TMP8]]) #[[ATTR4]], !dbg [[DBG121]]
|
|
// CHECK1-NEXT: ret void, !dbg [[DBG121]]
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__
|
|
// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias [[C:%.*]], i32 [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias [[B:%.*]], i8 addrspace(1)* noalias [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG122:![0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]] addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8 addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8
|
|
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca [10 x [10 x i32]]*, align 8
|
|
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]]
|
|
// CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]] addrspace(1)* [[B]], [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]]
|
|
// CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG135:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG135]]
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG135]]
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG135]]
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG135]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP3]] to [10 x [10 x i32]]*, !dbg [[DBG135]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP4]], [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG135]]
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG135]]
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG135]]
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast i8 addrspace(1)* [[TMP6]] to i8*, !dbg [[DBG135]]
|
|
// CHECK1-NEXT: store i8* [[TMP7]], i8** [[_TMP2]], align 8, !dbg [[DBG135]]
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG135]]
|
|
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !dbg [[DBG135]]
|
|
// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1), !dbg [[DBG135]]
|
|
// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]], !dbg [[DBG135]]
|
|
// CHECK1: .execute:
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG136:![0-9]+]]
|
|
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*, !dbg [[DBG136]]
|
|
// CHECK1-NEXT: store i32 [[TMP10]], i32* [[CONV]], align 4, !dbg [[DBG136]]
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[A_CASTED]], align 8, !dbg [[DBG136]]
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG136]]
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [10 x [10 x [10 x i32]]]* [[TMP2]] to i8*, !dbg [[DBG136]]
|
|
// CHECK1-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !dbg [[DBG136]]
|
|
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG136]]
|
|
// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP11]] to i8*, !dbg [[DBG136]]
|
|
// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8, !dbg [[DBG136]]
|
|
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG136]]
|
|
// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [10 x [10 x i32]]* [[TMP5]] to i8*, !dbg [[DBG136]]
|
|
// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8, !dbg [[DBG136]]
|
|
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG136]]
|
|
// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP18]], align 8, !dbg [[DBG136]]
|
|
// CHECK1-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG136]]
|
|
// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i64, [10 x [10 x i32]]*, i8*)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP19]], i64 4), !dbg [[DBG136]]
|
|
// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]], !dbg [[DBG137:![0-9]+]]
|
|
// CHECK1: .omp.deinit:
|
|
// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 1), !dbg [[DBG137]]
|
|
// CHECK1-NEXT: br label [[DOTEXIT:%.*]], !dbg [[DBG137]]
|
|
// CHECK1: .exit:
|
|
// CHECK1-NEXT: ret void, !dbg [[DBG139:![0-9]+]]
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__1
|
|
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]] addrspace(1)* noalias [[C:%.*]], i32 [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias [[B:%.*]], i8 addrspace(1)* noalias [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG140:![0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]] addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8 addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8
|
|
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca [10 x [10 x i32]]*, align 8
|
|
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: [[F:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[G:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[H:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG144:![0-9]+]]
|
|
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG144]]
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]]
|
|
// CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]] addrspace(1)* [[B]], [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG151:![0-9]+]]
|
|
// CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG153:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG154:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG154]]
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG154]]
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG154]]
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG154]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP3]] to [10 x [10 x i32]]*, !dbg [[DBG154]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP4]], [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG154]]
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG154]]
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG154]]
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast i8 addrspace(1)* [[TMP6]] to i8*, !dbg [[DBG154]]
|
|
// CHECK1-NEXT: store i8* [[TMP7]], i8** [[_TMP2]], align 8, !dbg [[DBG154]]
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG154]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[F]], metadata [[META155:![0-9]+]], metadata !DIExpression()), !dbg [[DBG157:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 1, !dbg [[DBG158:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG158]]
|
|
// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG158]]
|
|
// CHECK1-NEXT: store i32* [[ARRAYIDX4]], i32** [[F]], align 8, !dbg [[DBG157]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[G]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]]
|
|
// CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[G]], align 8, !dbg [[DBG160]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[H]], metadata [[META161:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP5]], i64 0, i64 1, !dbg [[DBG163:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG163]]
|
|
// CHECK1-NEXT: store i32* [[ARRAYIDX6]], i32** [[H]], align 8, !dbg [[DBG162]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[D]], metadata [[META164:![0-9]+]], metadata !DIExpression()), !dbg [[DBG165:![0-9]+]]
|
|
// CHECK1-NEXT: store i32 15, i32* [[D]], align 4, !dbg [[DBG165]]
|
|
// CHECK1-NEXT: store i32 5, i32* [[A_ADDR]], align 4, !dbg [[DBG166:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP5]], i64 0, i64 0, !dbg [[DBG167:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG168:![0-9]+]]
|
|
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG167]]
|
|
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG167]]
|
|
// CHECK1-NEXT: store i32 10, i32* [[ARRAYIDX8]], align 4, !dbg [[DBG169:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG170:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG170]]
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG171:![0-9]+]]
|
|
// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG170]]
|
|
// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG170]]
|
|
// CHECK1-NEXT: store i32 11, i32* [[ARRAYIDX12]], align 4, !dbg [[DBG172:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG173:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG173]]
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG174:![0-9]+]]
|
|
// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG173]]
|
|
// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG173]]
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4, !dbg [[DBG173]]
|
|
// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP5]], i64 0, i64 0, !dbg [[DBG175:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG176:![0-9]+]]
|
|
// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG175]]
|
|
// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG175]]
|
|
// CHECK1-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX19]], align 4, !dbg [[DBG177:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP8]], align 1, !dbg [[DBG178:![0-9]+]]
|
|
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1, !dbg [[DBG178]]
|
|
// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG178]]
|
|
// CHECK1-NEXT: store i32 [[CONV]], i32* [[D]], align 4, !dbg [[DBG179:![0-9]+]]
|
|
// CHECK1-NEXT: ret void, !dbg [[DBG180:![0-9]+]]
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2
|
|
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG181:![0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8
|
|
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8
|
|
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META182:![0-9]+]], metadata !DIExpression()), !dbg [[DBG183:![0-9]+]]
|
|
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG183]]
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META185:![0-9]+]], metadata !DIExpression()), !dbg [[DBG183]]
|
|
// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META186:![0-9]+]], metadata !DIExpression()), !dbg [[DBG183]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META187:![0-9]+]], metadata !DIExpression()), !dbg [[DBG183]]
|
|
// CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META188:![0-9]+]], metadata !DIExpression()), !dbg [[DBG183]]
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG189:![0-9]+]]
|
|
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG189]]
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG189]]
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG189]]
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG189]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG189]]
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG189]]
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG189]]
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG189]]
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG189]]
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP5]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG189]]
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP7]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG189]]
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast i8* [[TMP8]] to i8 addrspace(1)*, !dbg [[DBG189]]
|
|
// CHECK1-NEXT: call void @__omp_outlined___debug__1(i32* [[TMP3]], i32* [[TMP4]], [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP9]], i32 [[TMP6]], [10 x [10 x i32]] addrspace(1)* [[TMP10]], i8 addrspace(1)* [[TMP11]]) #[[ATTR4]], !dbg [[DBG189]]
|
|
// CHECK1-NEXT: ret void, !dbg [[DBG189]]
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37
|
|
// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG190:![0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8
|
|
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8
|
|
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META191:![0-9]+]], metadata !DIExpression()), !dbg [[DBG192:![0-9]+]]
|
|
// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META193:![0-9]+]], metadata !DIExpression()), !dbg [[DBG192]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META194:![0-9]+]], metadata !DIExpression()), !dbg [[DBG192]]
|
|
// CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META195:![0-9]+]], metadata !DIExpression()), !dbg [[DBG192]]
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG196:![0-9]+]]
|
|
// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG196]]
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG196]]
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG196]]
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG196]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG196]]
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG196]]
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG196]]
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG196]]
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP5]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG196]]
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast i8* [[TMP6]] to i8 addrspace(1)*, !dbg [[DBG196]]
|
|
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__([10 x [10 x [10 x i32]]] addrspace(1)* [[TMP7]], i32 [[TMP4]], [10 x [10 x i32]] addrspace(1)* [[TMP8]], i8 addrspace(1)* [[TMP9]]) #[[ATTR4]], !dbg [[DBG196]]
|
|
// CHECK1-NEXT: ret void, !dbg [[DBG196]]
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__
|
|
// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias [[C:%.*]], i32 addrspace(1)* noalias [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias [[B:%.*]], i8 addrspace(1)* noalias [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG197:![0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32 addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]] addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8 addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8
|
|
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca [10 x [10 x i32]]*, align 8
|
|
// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG203:![0-9]+]]
|
|
// CHECK1-NEXT: store i32 addrspace(1)* [[A]], i32 addrspace(1)** [[A_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32 addrspace(1)** [[A_ADDR]], metadata [[META204:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205:![0-9]+]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]] addrspace(1)* [[B]], [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META206:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]]
|
|
// CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG210:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG210]]
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG210]]
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG210]]
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)** [[A_ADDR]], align 8, !dbg [[DBG210]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast i32 addrspace(1)* [[TMP3]] to i32*, !dbg [[DBG210]]
|
|
// CHECK1-NEXT: store i32* [[TMP4]], i32** [[_TMP1]], align 8, !dbg [[DBG210]]
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8, !dbg [[DBG210]]
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG210]]
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP6]] to [10 x [10 x i32]]*, !dbg [[DBG210]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP7]], [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG210]]
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG210]]
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG210]]
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast i8 addrspace(1)* [[TMP9]] to i8*, !dbg [[DBG210]]
|
|
// CHECK1-NEXT: store i8* [[TMP10]], i8** [[_TMP3]], align 8, !dbg [[DBG210]]
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[_TMP3]], align 8, !dbg [[DBG210]]
|
|
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !dbg [[DBG210]]
|
|
// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1), !dbg [[DBG210]]
|
|
// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]], !dbg [[DBG210]]
|
|
// CHECK1: .execute:
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:[0-9]+]])
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG211:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [10 x [10 x [10 x i32]]]* [[TMP2]] to i8*, !dbg [[DBG211]]
|
|
// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !dbg [[DBG211]]
|
|
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG211]]
|
|
// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP5]] to i8*, !dbg [[DBG211]]
|
|
// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !dbg [[DBG211]]
|
|
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG211]]
|
|
// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP8]] to i8*, !dbg [[DBG211]]
|
|
// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8, !dbg [[DBG211]]
|
|
// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG211]]
|
|
// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP19]], align 8, !dbg [[DBG211]]
|
|
// CHECK1-NEXT: [[TMP20:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG211]]
|
|
// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB5]], i32 [[TMP12]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i32*, [10 x [10 x i32]]*, i8*)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP20]], i64 4), !dbg [[DBG211]]
|
|
// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]], !dbg [[DBG212:![0-9]+]]
|
|
// CHECK1: .omp.deinit:
|
|
// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 1), !dbg [[DBG212]]
|
|
// CHECK1-NEXT: br label [[DOTEXIT:%.*]], !dbg [[DBG212]]
|
|
// CHECK1: .exit:
|
|
// CHECK1-NEXT: ret void, !dbg [[DBG214:![0-9]+]]
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__3
|
|
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]] addrspace(1)* noalias [[C:%.*]], i32 addrspace(1)* noalias [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias [[B:%.*]], i8 addrspace(1)* noalias [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG215:![0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32 addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]] addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8 addrspace(1)*, align 8
|
|
// CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8
|
|
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca [10 x [10 x i32]]*, align 8
|
|
// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: [[F:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[G:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[H:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META218:![0-9]+]], metadata !DIExpression()), !dbg [[DBG219:![0-9]+]]
|
|
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META220:![0-9]+]], metadata !DIExpression()), !dbg [[DBG219]]
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META221:![0-9]+]], metadata !DIExpression()), !dbg [[DBG222:![0-9]+]]
|
|
// CHECK1-NEXT: store i32 addrspace(1)* [[A]], i32 addrspace(1)** [[A_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32 addrspace(1)** [[A_ADDR]], metadata [[META223:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224:![0-9]+]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]] addrspace(1)* [[B]], [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META225:![0-9]+]], metadata !DIExpression()), !dbg [[DBG226:![0-9]+]]
|
|
// CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META227:![0-9]+]], metadata !DIExpression()), !dbg [[DBG228:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG229:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG229]]
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG229]]
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG229]]
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)** [[A_ADDR]], align 8, !dbg [[DBG229]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast i32 addrspace(1)* [[TMP3]] to i32*, !dbg [[DBG229]]
|
|
// CHECK1-NEXT: store i32* [[TMP4]], i32** [[_TMP1]], align 8, !dbg [[DBG229]]
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8, !dbg [[DBG229]]
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG229]]
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP6]] to [10 x [10 x i32]]*, !dbg [[DBG229]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP7]], [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG229]]
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG229]]
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG229]]
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast i8 addrspace(1)* [[TMP9]] to i8*, !dbg [[DBG229]]
|
|
// CHECK1-NEXT: store i8* [[TMP10]], i8** [[_TMP3]], align 8, !dbg [[DBG229]]
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[_TMP3]], align 8, !dbg [[DBG229]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[F]], metadata [[META230:![0-9]+]], metadata !DIExpression()), !dbg [[DBG232:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 1, !dbg [[DBG233:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG233]]
|
|
// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX4]], i64 0, i64 1, !dbg [[DBG233]]
|
|
// CHECK1-NEXT: store i32* [[ARRAYIDX5]], i32** [[F]], align 8, !dbg [[DBG232]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[G]], metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235:![0-9]+]]
|
|
// CHECK1-NEXT: store i32* [[TMP5]], i32** [[G]], align 8, !dbg [[DBG235]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[H]], metadata [[META236:![0-9]+]], metadata !DIExpression()), !dbg [[DBG237:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP8]], i64 0, i64 1, !dbg [[DBG238:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG238]]
|
|
// CHECK1-NEXT: store i32* [[ARRAYIDX7]], i32** [[H]], align 8, !dbg [[DBG237]]
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[D]], metadata [[META239:![0-9]+]], metadata !DIExpression()), !dbg [[DBG240:![0-9]+]]
|
|
// CHECK1-NEXT: store i32 15, i32* [[D]], align 4, !dbg [[DBG240]]
|
|
// CHECK1-NEXT: store i32 5, i32* [[TMP5]], align 4, !dbg [[DBG241:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP8]], i64 0, i64 0, !dbg [[DBG242:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG243:![0-9]+]]
|
|
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG242]]
|
|
// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG242]]
|
|
// CHECK1-NEXT: store i32 10, i32* [[ARRAYIDX9]], align 4, !dbg [[DBG244:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG245:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG245]]
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG246:![0-9]+]]
|
|
// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG245]]
|
|
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG245]]
|
|
// CHECK1-NEXT: store i32 11, i32* [[ARRAYIDX13]], align 4, !dbg [[DBG247:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG248:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG248]]
|
|
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG249:![0-9]+]]
|
|
// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG248]]
|
|
// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG248]]
|
|
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX17]], align 4, !dbg [[DBG248]]
|
|
// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP8]], i64 0, i64 0, !dbg [[DBG250:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG251:![0-9]+]]
|
|
// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG250]]
|
|
// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG250]]
|
|
// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX20]], align 4, !dbg [[DBG252:![0-9]+]]
|
|
// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP8]], i64 0, i64 0, !dbg [[DBG253:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG254:![0-9]+]]
|
|
// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP17]] to i64, !dbg [[DBG253]]
|
|
// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG253]]
|
|
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX23]], align 4, !dbg [[DBG253]]
|
|
// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP18]], 0, !dbg [[DBG253]]
|
|
// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG255:![0-9]+]]
|
|
// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[TMP11]], align 1, !dbg [[DBG255]]
|
|
// CHECK1-NEXT: ret void, !dbg [[DBG256:![0-9]+]]
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4
|
|
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG257:![0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8
|
|
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8
|
|
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META260:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261:![0-9]+]]
|
|
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261]]
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META263:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261]]
|
|
// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META264:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META265:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261]]
|
|
// CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META266:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261]]
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG267:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG267]]
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG267]]
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG267]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG267]]
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG267]]
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG267]]
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG267]]
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG267]]
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG267]]
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP6]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG267]]
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast i32* [[TMP7]] to i32 addrspace(1)*, !dbg [[DBG267]]
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP8]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG267]]
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast i8* [[TMP9]] to i8 addrspace(1)*, !dbg [[DBG267]]
|
|
// CHECK1-NEXT: call void @__omp_outlined___debug__3(i32* [[TMP4]], i32* [[TMP5]], [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP10]], i32 addrspace(1)* [[TMP11]], [10 x [10 x i32]] addrspace(1)* [[TMP12]], i8 addrspace(1)* [[TMP13]]) #[[ATTR4]], !dbg [[DBG267]]
|
|
// CHECK1-NEXT: ret void, !dbg [[DBG267]]
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51
|
|
// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG268:![0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8
|
|
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8
|
|
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8
|
|
// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META271:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272:![0-9]+]]
|
|
// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META273:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272]]
|
|
// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272]]
|
|
// CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272]]
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG276:![0-9]+]]
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG276]]
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG276]]
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG276]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG276]]
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG276]]
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG276]]
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG276]]
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP4]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG276]]
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast i32* [[TMP5]] to i32 addrspace(1)*, !dbg [[DBG276]]
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP6]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG276]]
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast i8* [[TMP7]] to i8 addrspace(1)*, !dbg [[DBG276]]
|
|
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__([10 x [10 x [10 x i32]]] addrspace(1)* [[TMP8]], i32 addrspace(1)* [[TMP9]], [10 x [10 x i32]] addrspace(1)* [[TMP10]], i8 addrspace(1)* [[TMP11]]) #[[ATTR4]], !dbg [[DBG276]]
|
|
// CHECK1-NEXT: ret void, !dbg [[DBG276]]
|
|
//
|