Summary: We use this `dyn_ptr` argument in Clang/OpenMP to handle the `KernelLaunchEnvironment`. This is a per-kernel argument used to share some information. Currenetly, it's prepended to the argument list and we generate storage for it in the runtime. This is bad for a few reasons: 1. It changes the ABI by shifting user arguments 2. It cannot be trivially be left uninitialized if unused 3. The runtime must allocate its own memory for it This PR changes it to be appended instead. Additionally, space for this is always emitted. This means the OMPIRBuilder itself will provide the storage, we simply need to populate it in the runtime if it is used. This means that if it's unused we don't always pay the cost and it's easier for non-OpenMP users to ignore it. Backward compatibility is maintained by auto-upgrading the kernel arguments. In `libomptarget` we completely allocate a new buffer to store this in the new format. The plugins still need to respect the old ABI of the called device object, so we simply rotate it if it's the old version.
2149 lines
147 KiB
C++
2149 lines
147 KiB
C++
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 5
|
|
// expected-no-diagnostics
|
|
|
|
// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp -fopenmp-version=60 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CK-64
|
|
// RUN: %clang_cc1 -no-enable-noundef-analysis -fopenmp -fopenmp-version=60 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
|
|
// RUN: %clang_cc1 -no-enable-noundef-analysis -fopenmp -fopenmp-version=60 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CK-64
|
|
// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp -fopenmp-version=60 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CK-32
|
|
// RUN: %clang_cc1 -no-enable-noundef-analysis -fopenmp -fopenmp-version=60 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
|
|
// RUN: %clang_cc1 -no-enable-noundef-analysis -fopenmp -fopenmp-version=60 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CK-32
|
|
|
|
// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp-simd -fopenmp-version=60 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap --check-prefix SIMD-ONLY-64 %s
|
|
// RUN: %clang_cc1 -no-enable-noundef-analysis -fopenmp-simd -fopenmp-version=60 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
|
|
// RUN: %clang_cc1 -no-enable-noundef-analysis -fopenmp-simd -fopenmp-version=60 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap --check-prefix SIMD-ONLY-64 %s
|
|
// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp-simd -fopenmp-version=60 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap --check-prefix SIMD-ONLY-32 %s
|
|
// RUN: %clang_cc1 -no-enable-noundef-analysis -fopenmp-simd -fopenmp-version=60 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
|
|
// RUN: %clang_cc1 -no-enable-noundef-analysis -fopenmp-simd -fopenmp-version=60 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap --check-prefix SIMD-ONLY-32 %s
|
|
|
|
#ifndef HEADER
|
|
#define HEADER
|
|
void foo1(int a) {
|
|
double d = (double)a;
|
|
|
|
#pragma omp target default(private: scalar)
|
|
{
|
|
d += 1.0;
|
|
}
|
|
}
|
|
|
|
void foo2() {
|
|
int pvtArr[10];
|
|
|
|
#pragma omp target default(private: aggregate)
|
|
{
|
|
pvtArr[5]++;
|
|
}
|
|
}
|
|
|
|
void foo3() {
|
|
int *pa;
|
|
|
|
#pragma omp target default(private: pointer)
|
|
{
|
|
pa[50]++;
|
|
}
|
|
}
|
|
|
|
// Specified variable-category doesn't apply to referenced variable, so
|
|
// normal implicitly determined data-sharing applies.
|
|
void foo4() {
|
|
int p;
|
|
|
|
#pragma omp target default(private: pointer)
|
|
{
|
|
p++;
|
|
}
|
|
}
|
|
|
|
// Verify default clause with variable-category 'all' is equivalent to no
|
|
// variable-category. IR checks generated with 'all' but test runs without
|
|
// variable-category.
|
|
void foo5(int a) {
|
|
double d = (double)a;
|
|
int pvtArr[10];
|
|
int *pa;
|
|
|
|
#pragma omp target default(private)
|
|
{
|
|
d += 1.0;
|
|
pvtArr[5]++;
|
|
pa[50]++;
|
|
}
|
|
}
|
|
|
|
// Verify default clause with 'shared' DSA is ignored. This makes it
|
|
// equivalent to target with no default clause. IR checks generated with
|
|
// no default clause but test runs with default 'shared'.
|
|
void foo6(int a) {
|
|
double d = (double)a;
|
|
int pvtArr[10];
|
|
int *pa;
|
|
|
|
#pragma omp target default(shared)
|
|
{
|
|
d += 1.0;
|
|
pvtArr[5]++;
|
|
pa[50]++;
|
|
}
|
|
}
|
|
|
|
// Verify default clause with 'firstprivate' DSA is equivalent to specifying
|
|
// defaultmap with 'firstprivate'. IR checks generated with
|
|
// defaultmap(firstprivate) but test runs with default(firstprivate).
|
|
void foo7(int a) {
|
|
double d = (double)a;
|
|
int pvtArr[10];
|
|
int *pa;
|
|
|
|
#pragma omp target default(firstprivate)
|
|
{
|
|
d += 1.0;
|
|
pvtArr[5]++;
|
|
pa[50]++;
|
|
}
|
|
}
|
|
|
|
// Verify 'default' clause on a combined 'target' directive is equivalent to
|
|
// specifying its constituent directives with 'default' clauses. IR checks
|
|
// generated with constituent directives but test runs with combined
|
|
// directive.
|
|
void foo8() {
|
|
int x = 0;
|
|
#pragma omp target teams distribute parallel for default(firstprivate) firstprivate(x)
|
|
for (int i=0; i<10; i++)
|
|
x += 1;
|
|
}
|
|
#endif // HEADER
|
|
// CK-64-LABEL: define dso_local void @_Z4foo1i(
|
|
// CK-64-SAME: i32 signext [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// CK-64-NEXT: [[D_CASTED:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
|
|
// CK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-64-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// CK-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// CK-64-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// CK-64-NEXT: [[TMP1:%.*]] = load double, ptr [[D]], align 8
|
|
// CK-64-NEXT: store double [[TMP1]], ptr [[D_CASTED]], align 8
|
|
// CK-64-NEXT: [[TMP2:%.*]] = load i64, ptr [[D_CASTED]], align 8
|
|
// CK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8
|
|
// CK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i64 [[TMP2]], ptr [[TMP4]], align 8
|
|
// CK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP5]], align 8
|
|
// CK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP6]], align 8
|
|
// CK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP7]], align 8
|
|
// CK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP8]], align 8
|
|
// CK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i32 4, ptr [[TMP11]], align 4
|
|
// CK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-64-NEXT: store i32 2, ptr [[TMP12]], align 4
|
|
// CK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-64-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8
|
|
// CK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-64-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8
|
|
// CK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-64-NEXT: store ptr @.offload_sizes, ptr [[TMP15]], align 8
|
|
// CK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-64-NEXT: store ptr @.offload_maptypes, ptr [[TMP16]], align 8
|
|
// CK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP17]], align 8
|
|
// CK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP18]], align 8
|
|
// CK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-64-NEXT: store i64 0, ptr [[TMP19]], align 8
|
|
// CK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-64-NEXT: store i64 0, ptr [[TMP20]], align 8
|
|
// CK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP21]], align 4
|
|
// CK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP22]], align 4
|
|
// CK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-64-NEXT: store i32 0, ptr [[TMP23]], align 4
|
|
// CK-64-NEXT: [[TMP24:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo1i_l23.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
|
|
// CK-64-NEXT: br i1 [[TMP25]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-64: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo1i_l23(i64 [[TMP2]], ptr null) #[[ATTR2:[0-9]+]]
|
|
// CK-64-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-64: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo1i_l23(
|
|
// CK-64-SAME: i64 [[D:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[D1:%.*]] = alloca double, align 8
|
|
// CK-64-NEXT: store i64 [[D]], ptr [[D_ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load double, ptr [[D1]], align 8
|
|
// CK-64-NEXT: [[ADD:%.*]] = fadd double [[TMP0]], 1.000000e+00
|
|
// CK-64-NEXT: store double [[ADD]], ptr [[D1]], align 8
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define dso_local void @_Z4foo2v(
|
|
// CK-64-SAME: ) #[[ATTR0]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// CK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
|
|
// CK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-64-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store ptr [[PVTARR]], ptr [[TMP0]], align 8
|
|
// CK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store ptr [[PVTARR]], ptr [[TMP1]], align 8
|
|
// CK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP2]], align 8
|
|
// CK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP3]], align 8
|
|
// CK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP4]], align 8
|
|
// CK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP5]], align 8
|
|
// CK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i32 4, ptr [[TMP8]], align 4
|
|
// CK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-64-NEXT: store i32 2, ptr [[TMP9]], align 4
|
|
// CK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-64-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
|
|
// CK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-64-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8
|
|
// CK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-64-NEXT: store ptr @.offload_sizes.1, ptr [[TMP12]], align 8
|
|
// CK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-64-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP13]], align 8
|
|
// CK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP14]], align 8
|
|
// CK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP15]], align 8
|
|
// CK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-64-NEXT: store i64 0, ptr [[TMP16]], align 8
|
|
// CK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-64-NEXT: store i64 0, ptr [[TMP17]], align 8
|
|
// CK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP18]], align 4
|
|
// CK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4
|
|
// CK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-64-NEXT: store i32 0, ptr [[TMP20]], align 4
|
|
// CK-64-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l32.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-64-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
|
|
// CK-64-NEXT: br i1 [[TMP22]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-64: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l32(ptr [[PVTARR]], ptr null) #[[ATTR2]]
|
|
// CK-64-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-64: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l32(
|
|
// CK-64-SAME: ptr nonnull align 4 dereferenceable(40) [[PVTARR:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[PVTARR_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[PVTARR1:%.*]] = alloca [10 x i32], align 4
|
|
// CK-64-NEXT: store ptr [[PVTARR]], ptr [[PVTARR_ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PVTARR_ADDR]], align 8, !nonnull [[META18:![0-9]+]], !align [[META19:![0-9]+]]
|
|
// CK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[PVTARR1]], i64 0, i64 5
|
|
// CK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// CK-64-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
|
|
// CK-64-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define dso_local void @_Z4foo3v(
|
|
// CK-64-SAME: ) #[[ATTR0]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[PA:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
|
|
// CK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PA]], align 8
|
|
// CK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8
|
|
// CK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8
|
|
// CK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP3]], align 8
|
|
// CK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP4]], align 8
|
|
// CK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP5]], align 8
|
|
// CK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP6]], align 8
|
|
// CK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i32 4, ptr [[TMP9]], align 4
|
|
// CK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-64-NEXT: store i32 2, ptr [[TMP10]], align 4
|
|
// CK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-64-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8
|
|
// CK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-64-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8
|
|
// CK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-64-NEXT: store ptr @.offload_sizes.3, ptr [[TMP13]], align 8
|
|
// CK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-64-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP14]], align 8
|
|
// CK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP15]], align 8
|
|
// CK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP16]], align 8
|
|
// CK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-64-NEXT: store i64 0, ptr [[TMP17]], align 8
|
|
// CK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-64-NEXT: store i64 0, ptr [[TMP18]], align 8
|
|
// CK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP19]], align 4
|
|
// CK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4
|
|
// CK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-64-NEXT: store i32 0, ptr [[TMP21]], align 4
|
|
// CK-64-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l41.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-64-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
|
|
// CK-64-NEXT: br i1 [[TMP23]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-64: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l41(ptr [[TMP0]], ptr null) #[[ATTR2]]
|
|
// CK-64-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-64: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l41(
|
|
// CK-64-SAME: ptr [[PA:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[PA_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[PA1:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: store ptr [[PA]], ptr [[PA_ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PA1]], align 8
|
|
// CK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 50
|
|
// CK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// CK-64-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
|
|
// CK-64-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define dso_local void @_Z4foo4v(
|
|
// CK-64-SAME: ) #[[ATTR0]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[P:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[P_CASTED:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
|
|
// CK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4
|
|
// CK-64-NEXT: store i32 [[TMP0]], ptr [[P_CASTED]], align 4
|
|
// CK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[P_CASTED]], align 8
|
|
// CK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8
|
|
// CK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8
|
|
// CK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP4]], align 8
|
|
// CK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP5]], align 8
|
|
// CK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP6]], align 8
|
|
// CK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP7]], align 8
|
|
// CK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i32 4, ptr [[TMP10]], align 4
|
|
// CK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-64-NEXT: store i32 2, ptr [[TMP11]], align 4
|
|
// CK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-64-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8
|
|
// CK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-64-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8
|
|
// CK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-64-NEXT: store ptr @.offload_sizes.5, ptr [[TMP14]], align 8
|
|
// CK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-64-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP15]], align 8
|
|
// CK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP16]], align 8
|
|
// CK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP17]], align 8
|
|
// CK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-64-NEXT: store i64 0, ptr [[TMP18]], align 8
|
|
// CK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-64-NEXT: store i64 0, ptr [[TMP19]], align 8
|
|
// CK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP20]], align 4
|
|
// CK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4
|
|
// CK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-64-NEXT: store i32 0, ptr [[TMP22]], align 4
|
|
// CK-64-NEXT: [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l52.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-64-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
|
|
// CK-64-NEXT: br i1 [[TMP24]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-64: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l52(i64 [[TMP1]], ptr null) #[[ATTR2]]
|
|
// CK-64-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-64: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l52(
|
|
// CK-64-SAME: i64 [[P:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[P_ADDR:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: store i64 [[P]], ptr [[P_ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load i32, ptr [[P_ADDR]], align 4
|
|
// CK-64-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
|
|
// CK-64-NEXT: store i32 [[INC]], ptr [[P_ADDR]], align 4
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define dso_local void @_Z4foo5i(
|
|
// CK-64-SAME: i32 signext [[A:%.*]]) #[[ATTR0]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// CK-64-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// CK-64-NEXT: [[PA:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[D_CASTED:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
|
|
// CK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-64-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// CK-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// CK-64-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// CK-64-NEXT: [[TMP1:%.*]] = load double, ptr [[D]], align 8
|
|
// CK-64-NEXT: store double [[TMP1]], ptr [[D_CASTED]], align 8
|
|
// CK-64-NEXT: [[TMP2:%.*]] = load i64, ptr [[D_CASTED]], align 8
|
|
// CK-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PA]], align 8
|
|
// CK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i64 [[TMP2]], ptr [[TMP4]], align 8
|
|
// CK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8
|
|
// CK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP6]], align 8
|
|
// CK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr [[PVTARR]], ptr [[TMP7]], align 8
|
|
// CK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr [[PVTARR]], ptr [[TMP8]], align 8
|
|
// CK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP9]], align 8
|
|
// CK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
|
|
// CK-64-NEXT: store ptr [[TMP3]], ptr [[TMP10]], align 8
|
|
// CK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
|
|
// CK-64-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8
|
|
// CK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP12]], align 8
|
|
// CK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP13]], align 8
|
|
// CK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP14]], align 8
|
|
// CK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP15]], align 8
|
|
// CK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i32 4, ptr [[TMP18]], align 4
|
|
// CK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-64-NEXT: store i32 4, ptr [[TMP19]], align 4
|
|
// CK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-64-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8
|
|
// CK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-64-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 8
|
|
// CK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-64-NEXT: store ptr @.offload_sizes.7, ptr [[TMP22]], align 8
|
|
// CK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-64-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP23]], align 8
|
|
// CK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP24]], align 8
|
|
// CK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP25]], align 8
|
|
// CK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-64-NEXT: store i64 0, ptr [[TMP26]], align 8
|
|
// CK-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-64-NEXT: store i64 0, ptr [[TMP27]], align 8
|
|
// CK-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP28]], align 4
|
|
// CK-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP29]], align 4
|
|
// CK-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-64-NEXT: store i32 0, ptr [[TMP30]], align 4
|
|
// CK-64-NEXT: [[TMP31:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo5i_l66.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-64-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
|
|
// CK-64-NEXT: br i1 [[TMP32]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-64: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo5i_l66(i64 [[TMP2]], ptr [[PVTARR]], ptr [[TMP3]], ptr null) #[[ATTR2]]
|
|
// CK-64-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-64: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo5i_l66(
|
|
// CK-64-SAME: i64 [[D:%.*]], ptr nonnull align 4 dereferenceable(40) [[PVTARR:%.*]], ptr [[PA:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[PVTARR_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[PA_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[D1:%.*]] = alloca double, align 8
|
|
// CK-64-NEXT: [[PVTARR2:%.*]] = alloca [10 x i32], align 4
|
|
// CK-64-NEXT: [[PA3:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: store i64 [[D]], ptr [[D_ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[PVTARR]], ptr [[PVTARR_ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[PA]], ptr [[PA_ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PVTARR_ADDR]], align 8, !nonnull [[META18]], !align [[META19]]
|
|
// CK-64-NEXT: [[TMP1:%.*]] = load double, ptr [[D1]], align 8
|
|
// CK-64-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 1.000000e+00
|
|
// CK-64-NEXT: store double [[ADD]], ptr [[D1]], align 8
|
|
// CK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[PVTARR2]], i64 0, i64 5
|
|
// CK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// CK-64-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
|
|
// CK-64-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// CK-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PA3]], align 8
|
|
// CK-64-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 50
|
|
// CK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
|
|
// CK-64-NEXT: [[INC5:%.*]] = add nsw i32 [[TMP4]], 1
|
|
// CK-64-NEXT: store i32 [[INC5]], ptr [[ARRAYIDX4]], align 4
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define dso_local void @_Z4foo6i(
|
|
// CK-64-SAME: i32 signext [[A:%.*]]) #[[ATTR0]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// CK-64-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// CK-64-NEXT: [[PA:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[D_CASTED:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
|
|
// CK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-64-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// CK-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// CK-64-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// CK-64-NEXT: [[TMP1:%.*]] = load double, ptr [[D]], align 8
|
|
// CK-64-NEXT: store double [[TMP1]], ptr [[D_CASTED]], align 8
|
|
// CK-64-NEXT: [[TMP2:%.*]] = load i64, ptr [[D_CASTED]], align 8
|
|
// CK-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PA]], align 8
|
|
// CK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i64 [[TMP2]], ptr [[TMP4]], align 8
|
|
// CK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8
|
|
// CK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP6]], align 8
|
|
// CK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr [[PVTARR]], ptr [[TMP7]], align 8
|
|
// CK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr [[PVTARR]], ptr [[TMP8]], align 8
|
|
// CK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP9]], align 8
|
|
// CK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
|
|
// CK-64-NEXT: store ptr [[TMP3]], ptr [[TMP10]], align 8
|
|
// CK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
|
|
// CK-64-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8
|
|
// CK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP12]], align 8
|
|
// CK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP13]], align 8
|
|
// CK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP14]], align 8
|
|
// CK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP15]], align 8
|
|
// CK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i32 4, ptr [[TMP18]], align 4
|
|
// CK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-64-NEXT: store i32 4, ptr [[TMP19]], align 4
|
|
// CK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-64-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8
|
|
// CK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-64-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 8
|
|
// CK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-64-NEXT: store ptr @.offload_sizes.9, ptr [[TMP22]], align 8
|
|
// CK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-64-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP23]], align 8
|
|
// CK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP24]], align 8
|
|
// CK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP25]], align 8
|
|
// CK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-64-NEXT: store i64 0, ptr [[TMP26]], align 8
|
|
// CK-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-64-NEXT: store i64 0, ptr [[TMP27]], align 8
|
|
// CK-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP28]], align 4
|
|
// CK-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP29]], align 4
|
|
// CK-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-64-NEXT: store i32 0, ptr [[TMP30]], align 4
|
|
// CK-64-NEXT: [[TMP31:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo6i_l82.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-64-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
|
|
// CK-64-NEXT: br i1 [[TMP32]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-64: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo6i_l82(i64 [[TMP2]], ptr [[PVTARR]], ptr [[TMP3]], ptr null) #[[ATTR2]]
|
|
// CK-64-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-64: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo6i_l82(
|
|
// CK-64-SAME: i64 [[D:%.*]], ptr nonnull align 4 dereferenceable(40) [[PVTARR:%.*]], ptr [[PA:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[PVTARR_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[PA_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: store i64 [[D]], ptr [[D_ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[PVTARR]], ptr [[PVTARR_ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[PA]], ptr [[PA_ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PVTARR_ADDR]], align 8, !nonnull [[META18]], !align [[META19]]
|
|
// CK-64-NEXT: [[TMP1:%.*]] = load double, ptr [[D_ADDR]], align 8
|
|
// CK-64-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 1.000000e+00
|
|
// CK-64-NEXT: store double [[ADD]], ptr [[D_ADDR]], align 8
|
|
// CK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 5
|
|
// CK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// CK-64-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
|
|
// CK-64-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// CK-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PA_ADDR]], align 8
|
|
// CK-64-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 50
|
|
// CK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
|
|
// CK-64-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP4]], 1
|
|
// CK-64-NEXT: store i32 [[INC2]], ptr [[ARRAYIDX1]], align 4
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define dso_local void @_Z4foo7i(
|
|
// CK-64-SAME: i32 signext [[A:%.*]]) #[[ATTR0]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// CK-64-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// CK-64-NEXT: [[PA:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[D_CASTED:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
|
|
// CK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-64-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// CK-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// CK-64-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// CK-64-NEXT: [[TMP1:%.*]] = load double, ptr [[D]], align 8
|
|
// CK-64-NEXT: store double [[TMP1]], ptr [[D_CASTED]], align 8
|
|
// CK-64-NEXT: [[TMP2:%.*]] = load i64, ptr [[D_CASTED]], align 8
|
|
// CK-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PA]], align 8
|
|
// CK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i64 [[TMP2]], ptr [[TMP4]], align 8
|
|
// CK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8
|
|
// CK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP6]], align 8
|
|
// CK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr [[PVTARR]], ptr [[TMP7]], align 8
|
|
// CK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr [[PVTARR]], ptr [[TMP8]], align 8
|
|
// CK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP9]], align 8
|
|
// CK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
|
|
// CK-64-NEXT: store ptr [[TMP3]], ptr [[TMP10]], align 8
|
|
// CK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
|
|
// CK-64-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8
|
|
// CK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP12]], align 8
|
|
// CK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP13]], align 8
|
|
// CK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP14]], align 8
|
|
// CK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP15]], align 8
|
|
// CK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i32 4, ptr [[TMP18]], align 4
|
|
// CK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-64-NEXT: store i32 4, ptr [[TMP19]], align 4
|
|
// CK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-64-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8
|
|
// CK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-64-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 8
|
|
// CK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-64-NEXT: store ptr @.offload_sizes.11, ptr [[TMP22]], align 8
|
|
// CK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-64-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP23]], align 8
|
|
// CK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP24]], align 8
|
|
// CK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP25]], align 8
|
|
// CK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-64-NEXT: store i64 0, ptr [[TMP26]], align 8
|
|
// CK-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-64-NEXT: store i64 0, ptr [[TMP27]], align 8
|
|
// CK-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP28]], align 4
|
|
// CK-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP29]], align 4
|
|
// CK-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-64-NEXT: store i32 0, ptr [[TMP30]], align 4
|
|
// CK-64-NEXT: [[TMP31:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo7i_l98.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-64-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
|
|
// CK-64-NEXT: br i1 [[TMP32]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-64: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo7i_l98(i64 [[TMP2]], ptr [[PVTARR]], ptr [[TMP3]], ptr null) #[[ATTR2]]
|
|
// CK-64-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-64: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo7i_l98(
|
|
// CK-64-SAME: i64 [[D:%.*]], ptr nonnull align 4 dereferenceable(40) [[PVTARR:%.*]], ptr [[PA:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[PVTARR_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[PA_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[PVTARR1:%.*]] = alloca [10 x i32], align 4
|
|
// CK-64-NEXT: store i64 [[D]], ptr [[D_ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[PVTARR]], ptr [[PVTARR_ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[PA]], ptr [[PA_ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PVTARR_ADDR]], align 8, !nonnull [[META18]], !align [[META19]]
|
|
// CK-64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[PVTARR1]], ptr align 4 [[TMP0]], i64 40, i1 false)
|
|
// CK-64-NEXT: [[TMP1:%.*]] = load double, ptr [[D_ADDR]], align 8
|
|
// CK-64-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 1.000000e+00
|
|
// CK-64-NEXT: store double [[ADD]], ptr [[D_ADDR]], align 8
|
|
// CK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[PVTARR1]], i64 0, i64 5
|
|
// CK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// CK-64-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
|
|
// CK-64-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// CK-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PA_ADDR]], align 8
|
|
// CK-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 50
|
|
// CK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
|
|
// CK-64-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP4]], 1
|
|
// CK-64-NEXT: store i32 [[INC3]], ptr [[ARRAYIDX2]], align 4
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define dso_local void @_Z4foo8v(
|
|
// CK-64-SAME: ) #[[ATTR0]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[X:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
|
|
// CK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
|
|
// CK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-64-NEXT: store i32 0, ptr [[X]], align 4
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4
|
|
// CK-64-NEXT: store i32 [[TMP0]], ptr [[X_CASTED]], align 4
|
|
// CK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X_CASTED]], align 8
|
|
// CK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8
|
|
// CK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8
|
|
// CK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP4]], align 8
|
|
// CK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP5]], align 8
|
|
// CK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP6]], align 8
|
|
// CK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP7]], align 8
|
|
// CK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-64-NEXT: store i32 4, ptr [[TMP10]], align 4
|
|
// CK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-64-NEXT: store i32 2, ptr [[TMP11]], align 4
|
|
// CK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-64-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8
|
|
// CK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-64-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8
|
|
// CK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-64-NEXT: store ptr @.offload_sizes.13, ptr [[TMP14]], align 8
|
|
// CK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-64-NEXT: store ptr @.offload_maptypes.14, ptr [[TMP15]], align 8
|
|
// CK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP16]], align 8
|
|
// CK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-64-NEXT: store ptr null, ptr [[TMP17]], align 8
|
|
// CK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-64-NEXT: store i64 10, ptr [[TMP18]], align 8
|
|
// CK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-64-NEXT: store i64 0, ptr [[TMP19]], align 8
|
|
// CK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4
|
|
// CK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4
|
|
// CK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-64-NEXT: store i32 0, ptr [[TMP22]], align 4
|
|
// CK-64-NEXT: [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo8v_l112.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-64-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
|
|
// CK-64-NEXT: br i1 [[TMP24]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-64: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo8v_l112(i64 [[TMP1]], ptr null) #[[ATTR2]]
|
|
// CK-64-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-64: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo8v_l112(
|
|
// CK-64-SAME: i64 [[X:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
|
|
// CK-64-NEXT: store i32 [[TMP0]], ptr [[X_CASTED]], align 4
|
|
// CK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X_CASTED]], align 8
|
|
// CK-64-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo8v_l112.omp_outlined, i64 [[TMP1]])
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo8v_l112.omp_outlined(
|
|
// CK-64-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[X:%.*]]) #[[ATTR1]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
|
// CK-64-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8
|
|
// CK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
|
|
// CK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
|
// CK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
|
|
// CK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
|
|
// CK-64-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
|
|
// CK-64: [[COND_TRUE]]:
|
|
// CK-64-NEXT: br label %[[COND_END:.*]]
|
|
// CK-64: [[COND_FALSE]]:
|
|
// CK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CK-64-NEXT: br label %[[COND_END]]
|
|
// CK-64: [[COND_END]]:
|
|
// CK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP3]], %[[COND_FALSE]] ]
|
|
// CK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
|
// CK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
|
|
// CK-64-NEXT: br label %[[OMP_INNER_FOR_COND:.*]]
|
|
// CK-64: [[OMP_INNER_FOR_COND]]:
|
|
// CK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
|
|
// CK-64-NEXT: br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
|
|
// CK-64: [[OMP_INNER_FOR_BODY]]:
|
|
// CK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
|
// CK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
|
|
// CK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
|
|
// CK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[X_ADDR]], align 4
|
|
// CK-64-NEXT: store i32 [[TMP11]], ptr [[X_CASTED]], align 4
|
|
// CK-64-NEXT: [[TMP12:%.*]] = load i64, ptr [[X_CASTED]], align 8
|
|
// CK-64-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo8v_l112.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]])
|
|
// CK-64-NEXT: br label %[[OMP_INNER_FOR_INC:.*]]
|
|
// CK-64: [[OMP_INNER_FOR_INC]]:
|
|
// CK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
|
// CK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
|
|
// CK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
|
|
// CK-64-NEXT: br label %[[OMP_INNER_FOR_COND]]
|
|
// CK-64: [[OMP_INNER_FOR_END]]:
|
|
// CK-64-NEXT: br label %[[OMP_LOOP_EXIT:.*]]
|
|
// CK-64: [[OMP_LOOP_EXIT]]:
|
|
// CK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-64-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo8v_l112.omp_outlined.omp_outlined(
|
|
// CK-64-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[X:%.*]]) #[[ATTR1]] {
|
|
// CK-64-NEXT: [[ENTRY:.*:]]
|
|
// CK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
|
// CK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8
|
|
// CK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
|
// CK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
|
|
// CK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
|
|
// CK-64-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8
|
|
// CK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
|
// CK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
|
|
// CK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
|
|
// CK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
|
|
// CK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
|
|
// CK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
|
|
// CK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
|
|
// CK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
|
|
// CK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
|
// CK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
|
// CK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
|
|
// CK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
|
// CK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
|
|
// CK-64-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
|
|
// CK-64: [[COND_TRUE]]:
|
|
// CK-64-NEXT: br label %[[COND_END:.*]]
|
|
// CK-64: [[COND_FALSE]]:
|
|
// CK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
|
// CK-64-NEXT: br label %[[COND_END]]
|
|
// CK-64: [[COND_END]]:
|
|
// CK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
|
|
// CK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
|
// CK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
|
// CK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
|
|
// CK-64-NEXT: br label %[[OMP_INNER_FOR_COND:.*]]
|
|
// CK-64: [[OMP_INNER_FOR_COND]]:
|
|
// CK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
|
// CK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
|
|
// CK-64-NEXT: br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
|
|
// CK-64: [[OMP_INNER_FOR_BODY]]:
|
|
// CK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
|
|
// CK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
|
// CK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[X_ADDR]], align 4
|
|
// CK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
|
|
// CK-64-NEXT: store i32 [[ADD3]], ptr [[X_ADDR]], align 4
|
|
// CK-64-NEXT: br label %[[OMP_BODY_CONTINUE:.*]]
|
|
// CK-64: [[OMP_BODY_CONTINUE]]:
|
|
// CK-64-NEXT: br label %[[OMP_INNER_FOR_INC:.*]]
|
|
// CK-64: [[OMP_INNER_FOR_INC]]:
|
|
// CK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
|
|
// CK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
|
|
// CK-64-NEXT: br label %[[OMP_INNER_FOR_COND]]
|
|
// CK-64: [[OMP_INNER_FOR_END]]:
|
|
// CK-64-NEXT: br label %[[OMP_LOOP_EXIT:.*]]
|
|
// CK-64: [[OMP_LOOP_EXIT]]:
|
|
// CK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
|
|
// CK-64-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define dso_local void @_Z4foo1i(
|
|
// CK-32-SAME: i32 [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// CK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
|
|
// CK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// CK-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// CK-32-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// CK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr [[D]], ptr [[TMP1]], align 4
|
|
// CK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr [[D]], ptr [[TMP2]], align 4
|
|
// CK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP3]], align 4
|
|
// CK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP4]], align 4
|
|
// CK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP5]], align 4
|
|
// CK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP6]], align 4
|
|
// CK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-32-NEXT: store i32 4, ptr [[TMP9]], align 4
|
|
// CK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-32-NEXT: store i32 2, ptr [[TMP10]], align 4
|
|
// CK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4
|
|
// CK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4
|
|
// CK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-32-NEXT: store ptr @.offload_sizes, ptr [[TMP13]], align 4
|
|
// CK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-32-NEXT: store ptr @.offload_maptypes, ptr [[TMP14]], align 4
|
|
// CK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP15]], align 4
|
|
// CK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP16]], align 4
|
|
// CK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-32-NEXT: store i64 0, ptr [[TMP17]], align 8
|
|
// CK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-32-NEXT: store i64 0, ptr [[TMP18]], align 8
|
|
// CK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP19]], align 4
|
|
// CK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4
|
|
// CK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-32-NEXT: store i32 0, ptr [[TMP21]], align 4
|
|
// CK-32-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo1i_l23.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
|
|
// CK-32-NEXT: br i1 [[TMP23]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-32: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo1i_l23(ptr [[D]], ptr null) #[[ATTR2:[0-9]+]]
|
|
// CK-32-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-32: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo1i_l23(
|
|
// CK-32-SAME: ptr nonnull align 4 dereferenceable(8) [[D:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[D1:%.*]] = alloca double, align 8
|
|
// CK-32-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META19:![0-9]+]], !align [[META20:![0-9]+]]
|
|
// CK-32-NEXT: [[TMP1:%.*]] = load double, ptr [[D1]], align 8
|
|
// CK-32-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 1.000000e+00
|
|
// CK-32-NEXT: store double [[ADD]], ptr [[D1]], align 8
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define dso_local void @_Z4foo2v(
|
|
// CK-32-SAME: ) #[[ATTR0]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
|
|
// CK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-32-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr [[PVTARR]], ptr [[TMP0]], align 4
|
|
// CK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr [[PVTARR]], ptr [[TMP1]], align 4
|
|
// CK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP2]], align 4
|
|
// CK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP3]], align 4
|
|
// CK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP4]], align 4
|
|
// CK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP5]], align 4
|
|
// CK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-32-NEXT: store i32 4, ptr [[TMP8]], align 4
|
|
// CK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-32-NEXT: store i32 2, ptr [[TMP9]], align 4
|
|
// CK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
|
|
// CK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4
|
|
// CK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-32-NEXT: store ptr @.offload_sizes.1, ptr [[TMP12]], align 4
|
|
// CK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-32-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP13]], align 4
|
|
// CK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP14]], align 4
|
|
// CK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP15]], align 4
|
|
// CK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-32-NEXT: store i64 0, ptr [[TMP16]], align 8
|
|
// CK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-32-NEXT: store i64 0, ptr [[TMP17]], align 8
|
|
// CK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP18]], align 4
|
|
// CK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4
|
|
// CK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-32-NEXT: store i32 0, ptr [[TMP20]], align 4
|
|
// CK-32-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l32.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
|
|
// CK-32-NEXT: br i1 [[TMP22]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-32: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l32(ptr [[PVTARR]], ptr null) #[[ATTR2]]
|
|
// CK-32-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-32: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l32(
|
|
// CK-32-SAME: ptr nonnull align 4 dereferenceable(40) [[PVTARR:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[PVTARR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[PVTARR1:%.*]] = alloca [10 x i32], align 4
|
|
// CK-32-NEXT: store ptr [[PVTARR]], ptr [[PVTARR_ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PVTARR_ADDR]], align 4, !nonnull [[META19]], !align [[META20]]
|
|
// CK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[PVTARR1]], i32 0, i32 5
|
|
// CK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// CK-32-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
|
|
// CK-32-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define dso_local void @_Z4foo3v(
|
|
// CK-32-SAME: ) #[[ATTR0]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[PA:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
|
|
// CK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PA]], align 4
|
|
// CK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4
|
|
// CK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4
|
|
// CK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP3]], align 4
|
|
// CK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP4]], align 4
|
|
// CK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP5]], align 4
|
|
// CK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP6]], align 4
|
|
// CK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-32-NEXT: store i32 4, ptr [[TMP9]], align 4
|
|
// CK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-32-NEXT: store i32 2, ptr [[TMP10]], align 4
|
|
// CK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4
|
|
// CK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4
|
|
// CK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-32-NEXT: store ptr @.offload_sizes.3, ptr [[TMP13]], align 4
|
|
// CK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-32-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP14]], align 4
|
|
// CK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP15]], align 4
|
|
// CK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP16]], align 4
|
|
// CK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-32-NEXT: store i64 0, ptr [[TMP17]], align 8
|
|
// CK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-32-NEXT: store i64 0, ptr [[TMP18]], align 8
|
|
// CK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP19]], align 4
|
|
// CK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4
|
|
// CK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-32-NEXT: store i32 0, ptr [[TMP21]], align 4
|
|
// CK-32-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l41.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
|
|
// CK-32-NEXT: br i1 [[TMP23]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-32: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l41(ptr [[TMP0]], ptr null) #[[ATTR2]]
|
|
// CK-32-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-32: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l41(
|
|
// CK-32-SAME: ptr [[PA:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[PA_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[PA1:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: store ptr [[PA]], ptr [[PA_ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PA1]], align 4
|
|
// CK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 50
|
|
// CK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// CK-32-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
|
|
// CK-32-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define dso_local void @_Z4foo4v(
|
|
// CK-32-SAME: ) #[[ATTR0]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[P:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[P_CASTED:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
|
|
// CK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4
|
|
// CK-32-NEXT: store i32 [[TMP0]], ptr [[P_CASTED]], align 4
|
|
// CK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[P_CASTED]], align 4
|
|
// CK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4
|
|
// CK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4
|
|
// CK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP4]], align 4
|
|
// CK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP5]], align 4
|
|
// CK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP6]], align 4
|
|
// CK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP7]], align 4
|
|
// CK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-32-NEXT: store i32 4, ptr [[TMP10]], align 4
|
|
// CK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-32-NEXT: store i32 2, ptr [[TMP11]], align 4
|
|
// CK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4
|
|
// CK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 4
|
|
// CK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-32-NEXT: store ptr @.offload_sizes.5, ptr [[TMP14]], align 4
|
|
// CK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-32-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP15]], align 4
|
|
// CK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP16]], align 4
|
|
// CK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP17]], align 4
|
|
// CK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-32-NEXT: store i64 0, ptr [[TMP18]], align 8
|
|
// CK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-32-NEXT: store i64 0, ptr [[TMP19]], align 8
|
|
// CK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP20]], align 4
|
|
// CK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4
|
|
// CK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-32-NEXT: store i32 0, ptr [[TMP22]], align 4
|
|
// CK-32-NEXT: [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l52.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-32-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
|
|
// CK-32-NEXT: br i1 [[TMP24]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-32: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l52(i32 [[TMP1]], ptr null) #[[ATTR2]]
|
|
// CK-32-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-32: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l52(
|
|
// CK-32-SAME: i32 [[P:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[P_ADDR:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: store i32 [[P]], ptr [[P_ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[P_ADDR]], align 4
|
|
// CK-32-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
|
|
// CK-32-NEXT: store i32 [[INC]], ptr [[P_ADDR]], align 4
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define dso_local void @_Z4foo5i(
|
|
// CK-32-SAME: i32 [[A:%.*]]) #[[ATTR0]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// CK-32-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// CK-32-NEXT: [[PA:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
|
|
// CK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// CK-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// CK-32-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// CK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PA]], align 4
|
|
// CK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr [[D]], ptr [[TMP2]], align 4
|
|
// CK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr [[D]], ptr [[TMP3]], align 4
|
|
// CK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP4]], align 4
|
|
// CK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr [[PVTARR]], ptr [[TMP5]], align 4
|
|
// CK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr [[PVTARR]], ptr [[TMP6]], align 4
|
|
// CK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP7]], align 4
|
|
// CK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4
|
|
// CK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 4
|
|
// CK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP10]], align 4
|
|
// CK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP11]], align 4
|
|
// CK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP12]], align 4
|
|
// CK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP13]], align 4
|
|
// CK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-32-NEXT: store i32 4, ptr [[TMP16]], align 4
|
|
// CK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-32-NEXT: store i32 4, ptr [[TMP17]], align 4
|
|
// CK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 4
|
|
// CK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 4
|
|
// CK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-32-NEXT: store ptr @.offload_sizes.7, ptr [[TMP20]], align 4
|
|
// CK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-32-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP21]], align 4
|
|
// CK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP22]], align 4
|
|
// CK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP23]], align 4
|
|
// CK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-32-NEXT: store i64 0, ptr [[TMP24]], align 8
|
|
// CK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-32-NEXT: store i64 0, ptr [[TMP25]], align 8
|
|
// CK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP26]], align 4
|
|
// CK-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP27]], align 4
|
|
// CK-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-32-NEXT: store i32 0, ptr [[TMP28]], align 4
|
|
// CK-32-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo5i_l66.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-32-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
|
|
// CK-32-NEXT: br i1 [[TMP30]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-32: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo5i_l66(ptr [[D]], ptr [[PVTARR]], ptr [[TMP1]], ptr null) #[[ATTR2]]
|
|
// CK-32-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-32: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo5i_l66(
|
|
// CK-32-SAME: ptr nonnull align 4 dereferenceable(8) [[D:%.*]], ptr nonnull align 4 dereferenceable(40) [[PVTARR:%.*]], ptr [[PA:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[PVTARR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[PA_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[D1:%.*]] = alloca double, align 8
|
|
// CK-32-NEXT: [[PVTARR2:%.*]] = alloca [10 x i32], align 4
|
|
// CK-32-NEXT: [[PA3:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[PVTARR]], ptr [[PVTARR_ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[PA]], ptr [[PA_ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META19]], !align [[META20]]
|
|
// CK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PVTARR_ADDR]], align 4, !nonnull [[META19]], !align [[META20]]
|
|
// CK-32-NEXT: [[TMP2:%.*]] = load double, ptr [[D1]], align 8
|
|
// CK-32-NEXT: [[ADD:%.*]] = fadd double [[TMP2]], 1.000000e+00
|
|
// CK-32-NEXT: store double [[ADD]], ptr [[D1]], align 8
|
|
// CK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[PVTARR2]], i32 0, i32 5
|
|
// CK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// CK-32-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1
|
|
// CK-32-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// CK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PA3]], align 4
|
|
// CK-32-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 50
|
|
// CK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
|
|
// CK-32-NEXT: [[INC5:%.*]] = add nsw i32 [[TMP5]], 1
|
|
// CK-32-NEXT: store i32 [[INC5]], ptr [[ARRAYIDX4]], align 4
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define dso_local void @_Z4foo6i(
|
|
// CK-32-SAME: i32 [[A:%.*]]) #[[ATTR0]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// CK-32-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// CK-32-NEXT: [[PA:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
|
|
// CK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// CK-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// CK-32-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// CK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PA]], align 4
|
|
// CK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr [[D]], ptr [[TMP2]], align 4
|
|
// CK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr [[D]], ptr [[TMP3]], align 4
|
|
// CK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP4]], align 4
|
|
// CK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr [[PVTARR]], ptr [[TMP5]], align 4
|
|
// CK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr [[PVTARR]], ptr [[TMP6]], align 4
|
|
// CK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP7]], align 4
|
|
// CK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4
|
|
// CK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 4
|
|
// CK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP10]], align 4
|
|
// CK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP11]], align 4
|
|
// CK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP12]], align 4
|
|
// CK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP13]], align 4
|
|
// CK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-32-NEXT: store i32 4, ptr [[TMP16]], align 4
|
|
// CK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-32-NEXT: store i32 4, ptr [[TMP17]], align 4
|
|
// CK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 4
|
|
// CK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 4
|
|
// CK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-32-NEXT: store ptr @.offload_sizes.9, ptr [[TMP20]], align 4
|
|
// CK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-32-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP21]], align 4
|
|
// CK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP22]], align 4
|
|
// CK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP23]], align 4
|
|
// CK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-32-NEXT: store i64 0, ptr [[TMP24]], align 8
|
|
// CK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-32-NEXT: store i64 0, ptr [[TMP25]], align 8
|
|
// CK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP26]], align 4
|
|
// CK-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP27]], align 4
|
|
// CK-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-32-NEXT: store i32 0, ptr [[TMP28]], align 4
|
|
// CK-32-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo6i_l82.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-32-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
|
|
// CK-32-NEXT: br i1 [[TMP30]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-32: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo6i_l82(ptr [[D]], ptr [[PVTARR]], ptr [[TMP1]], ptr null) #[[ATTR2]]
|
|
// CK-32-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-32: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo6i_l82(
|
|
// CK-32-SAME: ptr nonnull align 4 dereferenceable(8) [[D:%.*]], ptr nonnull align 4 dereferenceable(40) [[PVTARR:%.*]], ptr [[PA:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[PVTARR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[PA_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[D1:%.*]] = alloca double, align 8
|
|
// CK-32-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[PVTARR]], ptr [[PVTARR_ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[PA]], ptr [[PA_ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META19]], !align [[META20]]
|
|
// CK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PVTARR_ADDR]], align 4, !nonnull [[META19]], !align [[META20]]
|
|
// CK-32-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP0]], align 8
|
|
// CK-32-NEXT: store double [[TMP2]], ptr [[D1]], align 8
|
|
// CK-32-NEXT: [[TMP3:%.*]] = load double, ptr [[D1]], align 8
|
|
// CK-32-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 1.000000e+00
|
|
// CK-32-NEXT: store double [[ADD]], ptr [[D1]], align 8
|
|
// CK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP1]], i32 0, i32 5
|
|
// CK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// CK-32-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
|
|
// CK-32-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// CK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PA_ADDR]], align 4
|
|
// CK-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 50
|
|
// CK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
|
|
// CK-32-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP6]], 1
|
|
// CK-32-NEXT: store i32 [[INC3]], ptr [[ARRAYIDX2]], align 4
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define dso_local void @_Z4foo7i(
|
|
// CK-32-SAME: i32 [[A:%.*]]) #[[ATTR0]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// CK-32-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// CK-32-NEXT: [[PA:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
|
|
// CK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// CK-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// CK-32-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// CK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PA]], align 4
|
|
// CK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr [[D]], ptr [[TMP2]], align 4
|
|
// CK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr [[D]], ptr [[TMP3]], align 4
|
|
// CK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP4]], align 4
|
|
// CK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr [[PVTARR]], ptr [[TMP5]], align 4
|
|
// CK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr [[PVTARR]], ptr [[TMP6]], align 4
|
|
// CK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP7]], align 4
|
|
// CK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4
|
|
// CK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 4
|
|
// CK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP10]], align 4
|
|
// CK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP11]], align 4
|
|
// CK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP12]], align 4
|
|
// CK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP13]], align 4
|
|
// CK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-32-NEXT: store i32 4, ptr [[TMP16]], align 4
|
|
// CK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-32-NEXT: store i32 4, ptr [[TMP17]], align 4
|
|
// CK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 4
|
|
// CK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 4
|
|
// CK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-32-NEXT: store ptr @.offload_sizes.11, ptr [[TMP20]], align 4
|
|
// CK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-32-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP21]], align 4
|
|
// CK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP22]], align 4
|
|
// CK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP23]], align 4
|
|
// CK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-32-NEXT: store i64 0, ptr [[TMP24]], align 8
|
|
// CK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-32-NEXT: store i64 0, ptr [[TMP25]], align 8
|
|
// CK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP26]], align 4
|
|
// CK-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP27]], align 4
|
|
// CK-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-32-NEXT: store i32 0, ptr [[TMP28]], align 4
|
|
// CK-32-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo7i_l98.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-32-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
|
|
// CK-32-NEXT: br i1 [[TMP30]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-32: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo7i_l98(ptr [[D]], ptr [[PVTARR]], ptr [[TMP1]], ptr null) #[[ATTR2]]
|
|
// CK-32-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-32: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo7i_l98(
|
|
// CK-32-SAME: ptr nonnull align 4 dereferenceable(8) [[D:%.*]], ptr nonnull align 4 dereferenceable(40) [[PVTARR:%.*]], ptr [[PA:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[PVTARR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[PA_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[D1:%.*]] = alloca double, align 8
|
|
// CK-32-NEXT: [[PVTARR2:%.*]] = alloca [10 x i32], align 4
|
|
// CK-32-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[PVTARR]], ptr [[PVTARR_ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[PA]], ptr [[PA_ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META19]], !align [[META20]]
|
|
// CK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PVTARR_ADDR]], align 4, !nonnull [[META19]], !align [[META20]]
|
|
// CK-32-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP0]], align 8
|
|
// CK-32-NEXT: store double [[TMP2]], ptr [[D1]], align 8
|
|
// CK-32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[PVTARR2]], ptr align 4 [[TMP1]], i32 40, i1 false)
|
|
// CK-32-NEXT: [[TMP3:%.*]] = load double, ptr [[D1]], align 8
|
|
// CK-32-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 1.000000e+00
|
|
// CK-32-NEXT: store double [[ADD]], ptr [[D1]], align 8
|
|
// CK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[PVTARR2]], i32 0, i32 5
|
|
// CK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// CK-32-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
|
|
// CK-32-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// CK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PA_ADDR]], align 4
|
|
// CK-32-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 50
|
|
// CK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
|
|
// CK-32-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP6]], 1
|
|
// CK-32-NEXT: store i32 [[INC4]], ptr [[ARRAYIDX3]], align 4
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define dso_local void @_Z4foo8v(
|
|
// CK-32-SAME: ) #[[ATTR0]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[X:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
|
|
// CK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
|
|
// CK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CK-32-NEXT: store i32 0, ptr [[X]], align 4
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4
|
|
// CK-32-NEXT: store i32 [[TMP0]], ptr [[X_CASTED]], align 4
|
|
// CK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X_CASTED]], align 4
|
|
// CK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4
|
|
// CK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4
|
|
// CK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP4]], align 4
|
|
// CK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP5]], align 4
|
|
// CK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP6]], align 4
|
|
// CK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP7]], align 4
|
|
// CK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CK-32-NEXT: store i32 4, ptr [[TMP10]], align 4
|
|
// CK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CK-32-NEXT: store i32 2, ptr [[TMP11]], align 4
|
|
// CK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CK-32-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4
|
|
// CK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CK-32-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 4
|
|
// CK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CK-32-NEXT: store ptr @.offload_sizes.13, ptr [[TMP14]], align 4
|
|
// CK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CK-32-NEXT: store ptr @.offload_maptypes.14, ptr [[TMP15]], align 4
|
|
// CK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP16]], align 4
|
|
// CK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CK-32-NEXT: store ptr null, ptr [[TMP17]], align 4
|
|
// CK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CK-32-NEXT: store i64 10, ptr [[TMP18]], align 8
|
|
// CK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CK-32-NEXT: store i64 0, ptr [[TMP19]], align 8
|
|
// CK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CK-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4
|
|
// CK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CK-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4
|
|
// CK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CK-32-NEXT: store i32 0, ptr [[TMP22]], align 4
|
|
// CK-32-NEXT: [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo8v_l112.region_id, ptr [[KERNEL_ARGS]])
|
|
// CK-32-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
|
|
// CK-32-NEXT: br i1 [[TMP24]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
|
|
// CK-32: [[OMP_OFFLOAD_FAILED]]:
|
|
// CK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo8v_l112(i32 [[TMP1]], ptr null) #[[ATTR2]]
|
|
// CK-32-NEXT: br label %[[OMP_OFFLOAD_CONT]]
|
|
// CK-32: [[OMP_OFFLOAD_CONT]]:
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo8v_l112(
|
|
// CK-32-SAME: i32 [[X:%.*]], ptr noalias [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
|
|
// CK-32-NEXT: store i32 [[TMP0]], ptr [[X_CASTED]], align 4
|
|
// CK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X_CASTED]], align 4
|
|
// CK-32-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo8v_l112.omp_outlined, i32 [[TMP1]])
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo8v_l112.omp_outlined(
|
|
// CK-32-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[X:%.*]]) #[[ATTR1]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
|
|
// CK-32-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
|
|
// CK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
|
|
// CK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
|
// CK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
|
|
// CK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
|
|
// CK-32-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
|
|
// CK-32: [[COND_TRUE]]:
|
|
// CK-32-NEXT: br label %[[COND_END:.*]]
|
|
// CK-32: [[COND_FALSE]]:
|
|
// CK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CK-32-NEXT: br label %[[COND_END]]
|
|
// CK-32: [[COND_END]]:
|
|
// CK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP3]], %[[COND_FALSE]] ]
|
|
// CK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
|
// CK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
|
|
// CK-32-NEXT: br label %[[OMP_INNER_FOR_COND:.*]]
|
|
// CK-32: [[OMP_INNER_FOR_COND]]:
|
|
// CK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
|
|
// CK-32-NEXT: br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
|
|
// CK-32: [[OMP_INNER_FOR_BODY]]:
|
|
// CK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
|
// CK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[X_ADDR]], align 4
|
|
// CK-32-NEXT: store i32 [[TMP9]], ptr [[X_CASTED]], align 4
|
|
// CK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[X_CASTED]], align 4
|
|
// CK-32-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo8v_l112.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]], i32 [[TMP10]])
|
|
// CK-32-NEXT: br label %[[OMP_INNER_FOR_INC:.*]]
|
|
// CK-32: [[OMP_INNER_FOR_INC]]:
|
|
// CK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
|
// CK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
|
|
// CK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
|
|
// CK-32-NEXT: br label %[[OMP_INNER_FOR_COND]]
|
|
// CK-32: [[OMP_INNER_FOR_END]]:
|
|
// CK-32-NEXT: br label %[[OMP_LOOP_EXIT:.*]]
|
|
// CK-32: [[OMP_LOOP_EXIT]]:
|
|
// CK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// CK-32-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo8v_l112.omp_outlined.omp_outlined(
|
|
// CK-32-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[X:%.*]]) #[[ATTR1]] {
|
|
// CK-32-NEXT: [[ENTRY:.*:]]
|
|
// CK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
|
|
// CK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
|
|
// CK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
|
|
// CK-32-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
|
|
// CK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
|
// CK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
|
|
// CK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
|
|
// CK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
|
|
// CK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
|
|
// CK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
|
|
// CK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
|
// CK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
|
// CK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
|
|
// CK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
|
// CK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
|
|
// CK-32-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
|
|
// CK-32: [[COND_TRUE]]:
|
|
// CK-32-NEXT: br label %[[COND_END:.*]]
|
|
// CK-32: [[COND_FALSE]]:
|
|
// CK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
|
// CK-32-NEXT: br label %[[COND_END]]
|
|
// CK-32: [[COND_END]]:
|
|
// CK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
|
|
// CK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
|
// CK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
|
// CK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
|
|
// CK-32-NEXT: br label %[[OMP_INNER_FOR_COND:.*]]
|
|
// CK-32: [[OMP_INNER_FOR_COND]]:
|
|
// CK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
|
// CK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
|
|
// CK-32-NEXT: br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
|
|
// CK-32: [[OMP_INNER_FOR_BODY]]:
|
|
// CK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
|
|
// CK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
|
// CK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[X_ADDR]], align 4
|
|
// CK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
|
|
// CK-32-NEXT: store i32 [[ADD2]], ptr [[X_ADDR]], align 4
|
|
// CK-32-NEXT: br label %[[OMP_BODY_CONTINUE:.*]]
|
|
// CK-32: [[OMP_BODY_CONTINUE]]:
|
|
// CK-32-NEXT: br label %[[OMP_INNER_FOR_INC:.*]]
|
|
// CK-32: [[OMP_INNER_FOR_INC]]:
|
|
// CK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1
|
|
// CK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
|
|
// CK-32-NEXT: br label %[[OMP_INNER_FOR_COND]]
|
|
// CK-32: [[OMP_INNER_FOR_END]]:
|
|
// CK-32-NEXT: br label %[[OMP_LOOP_EXIT:.*]]
|
|
// CK-32: [[OMP_LOOP_EXIT]]:
|
|
// CK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
|
|
// CK-32-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-64-LABEL: define dso_local void @_Z4foo1i(
|
|
// SIMD-ONLY-64-SAME: i32 signext [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// SIMD-ONLY-64-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-64-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// SIMD-ONLY-64-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// SIMD-ONLY-64-NEXT: [[D1:%.*]] = alloca double, align 8
|
|
// SIMD-ONLY-64-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// SIMD-ONLY-64-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// SIMD-ONLY-64-NEXT: [[TMP1:%.*]] = load double, ptr [[D1]], align 8
|
|
// SIMD-ONLY-64-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 1.000000e+00
|
|
// SIMD-ONLY-64-NEXT: store double [[ADD]], ptr [[D1]], align 8
|
|
// SIMD-ONLY-64-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-64-LABEL: define dso_local void @_Z4foo2v(
|
|
// SIMD-ONLY-64-SAME: ) #[[ATTR0]] {
|
|
// SIMD-ONLY-64-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-64-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// SIMD-ONLY-64-NEXT: [[PVTARR1:%.*]] = alloca [10 x i32], align 4
|
|
// SIMD-ONLY-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[PVTARR1]], i64 0, i64 5
|
|
// SIMD-ONLY-64-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
|
|
// SIMD-ONLY-64-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-64-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-64-LABEL: define dso_local void @_Z4foo3v(
|
|
// SIMD-ONLY-64-SAME: ) #[[ATTR0]] {
|
|
// SIMD-ONLY-64-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-64-NEXT: [[PA:%.*]] = alloca ptr, align 8
|
|
// SIMD-ONLY-64-NEXT: [[PA1:%.*]] = alloca ptr, align 8
|
|
// SIMD-ONLY-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PA1]], align 8
|
|
// SIMD-ONLY-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 50
|
|
// SIMD-ONLY-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
|
|
// SIMD-ONLY-64-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-64-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-64-LABEL: define dso_local void @_Z4foo4v(
|
|
// SIMD-ONLY-64-SAME: ) #[[ATTR0]] {
|
|
// SIMD-ONLY-64-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-64-NEXT: [[P:%.*]] = alloca i32, align 4
|
|
// SIMD-ONLY-64-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
|
|
// SIMD-ONLY-64-NEXT: store i32 [[INC]], ptr [[P]], align 4
|
|
// SIMD-ONLY-64-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-64-LABEL: define dso_local void @_Z4foo5i(
|
|
// SIMD-ONLY-64-SAME: i32 signext [[A:%.*]]) #[[ATTR0]] {
|
|
// SIMD-ONLY-64-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-64-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// SIMD-ONLY-64-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// SIMD-ONLY-64-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// SIMD-ONLY-64-NEXT: [[PA:%.*]] = alloca ptr, align 8
|
|
// SIMD-ONLY-64-NEXT: [[D1:%.*]] = alloca double, align 8
|
|
// SIMD-ONLY-64-NEXT: [[PVTARR2:%.*]] = alloca [10 x i32], align 4
|
|
// SIMD-ONLY-64-NEXT: [[PA3:%.*]] = alloca ptr, align 8
|
|
// SIMD-ONLY-64-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// SIMD-ONLY-64-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// SIMD-ONLY-64-NEXT: [[TMP1:%.*]] = load double, ptr [[D1]], align 8
|
|
// SIMD-ONLY-64-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 1.000000e+00
|
|
// SIMD-ONLY-64-NEXT: store double [[ADD]], ptr [[D1]], align 8
|
|
// SIMD-ONLY-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[PVTARR2]], i64 0, i64 5
|
|
// SIMD-ONLY-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
|
|
// SIMD-ONLY-64-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PA3]], align 8
|
|
// SIMD-ONLY-64-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 50
|
|
// SIMD-ONLY-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[INC5:%.*]] = add nsw i32 [[TMP4]], 1
|
|
// SIMD-ONLY-64-NEXT: store i32 [[INC5]], ptr [[ARRAYIDX4]], align 4
|
|
// SIMD-ONLY-64-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-64-LABEL: define dso_local void @_Z4foo6i(
|
|
// SIMD-ONLY-64-SAME: i32 signext [[A:%.*]]) #[[ATTR0]] {
|
|
// SIMD-ONLY-64-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-64-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// SIMD-ONLY-64-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// SIMD-ONLY-64-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// SIMD-ONLY-64-NEXT: [[PA:%.*]] = alloca ptr, align 8
|
|
// SIMD-ONLY-64-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// SIMD-ONLY-64-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// SIMD-ONLY-64-NEXT: [[TMP1:%.*]] = load double, ptr [[D]], align 8
|
|
// SIMD-ONLY-64-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 1.000000e+00
|
|
// SIMD-ONLY-64-NEXT: store double [[ADD]], ptr [[D]], align 8
|
|
// SIMD-ONLY-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[PVTARR]], i64 0, i64 5
|
|
// SIMD-ONLY-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
|
|
// SIMD-ONLY-64-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PA]], align 8
|
|
// SIMD-ONLY-64-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 50
|
|
// SIMD-ONLY-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP4]], 1
|
|
// SIMD-ONLY-64-NEXT: store i32 [[INC2]], ptr [[ARRAYIDX1]], align 4
|
|
// SIMD-ONLY-64-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-64-LABEL: define dso_local void @_Z4foo7i(
|
|
// SIMD-ONLY-64-SAME: i32 signext [[A:%.*]]) #[[ATTR0]] {
|
|
// SIMD-ONLY-64-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-64-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// SIMD-ONLY-64-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// SIMD-ONLY-64-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// SIMD-ONLY-64-NEXT: [[PA:%.*]] = alloca ptr, align 8
|
|
// SIMD-ONLY-64-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// SIMD-ONLY-64-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// SIMD-ONLY-64-NEXT: [[TMP1:%.*]] = load double, ptr [[D]], align 8
|
|
// SIMD-ONLY-64-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 1.000000e+00
|
|
// SIMD-ONLY-64-NEXT: store double [[ADD]], ptr [[D]], align 8
|
|
// SIMD-ONLY-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[PVTARR]], i64 0, i64 5
|
|
// SIMD-ONLY-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
|
|
// SIMD-ONLY-64-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PA]], align 8
|
|
// SIMD-ONLY-64-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 50
|
|
// SIMD-ONLY-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP4]], 1
|
|
// SIMD-ONLY-64-NEXT: store i32 [[INC2]], ptr [[ARRAYIDX1]], align 4
|
|
// SIMD-ONLY-64-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-64-LABEL: define dso_local void @_Z4foo8v(
|
|
// SIMD-ONLY-64-SAME: ) #[[ATTR0]] {
|
|
// SIMD-ONLY-64-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-64-NEXT: [[X:%.*]] = alloca i32, align 4
|
|
// SIMD-ONLY-64-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// SIMD-ONLY-64-NEXT: store i32 0, ptr [[X]], align 4
|
|
// SIMD-ONLY-64-NEXT: store i32 0, ptr [[I]], align 4
|
|
// SIMD-ONLY-64-NEXT: br label %[[FOR_COND:.*]]
|
|
// SIMD-ONLY-64: [[FOR_COND]]:
|
|
// SIMD-ONLY-64-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
|
|
// SIMD-ONLY-64-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
|
|
// SIMD-ONLY-64: [[FOR_BODY]]:
|
|
// SIMD-ONLY-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
|
|
// SIMD-ONLY-64-NEXT: store i32 [[ADD]], ptr [[X]], align 4
|
|
// SIMD-ONLY-64-NEXT: br label %[[FOR_INC:.*]]
|
|
// SIMD-ONLY-64: [[FOR_INC]]:
|
|
// SIMD-ONLY-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
|
|
// SIMD-ONLY-64-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
|
|
// SIMD-ONLY-64-NEXT: store i32 [[INC]], ptr [[I]], align 4
|
|
// SIMD-ONLY-64-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP1:![0-9]+]]
|
|
// SIMD-ONLY-64: [[FOR_END]]:
|
|
// SIMD-ONLY-64-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-32-LABEL: define dso_local void @_Z4foo1i(
|
|
// SIMD-ONLY-32-SAME: i32 [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// SIMD-ONLY-32-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// SIMD-ONLY-32-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// SIMD-ONLY-32-NEXT: [[D1:%.*]] = alloca double, align 8
|
|
// SIMD-ONLY-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// SIMD-ONLY-32-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// SIMD-ONLY-32-NEXT: [[TMP1:%.*]] = load double, ptr [[D1]], align 8
|
|
// SIMD-ONLY-32-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 1.000000e+00
|
|
// SIMD-ONLY-32-NEXT: store double [[ADD]], ptr [[D1]], align 8
|
|
// SIMD-ONLY-32-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-32-LABEL: define dso_local void @_Z4foo2v(
|
|
// SIMD-ONLY-32-SAME: ) #[[ATTR0]] {
|
|
// SIMD-ONLY-32-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-32-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// SIMD-ONLY-32-NEXT: [[PVTARR1:%.*]] = alloca [10 x i32], align 4
|
|
// SIMD-ONLY-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[PVTARR1]], i32 0, i32 5
|
|
// SIMD-ONLY-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
|
|
// SIMD-ONLY-32-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-32-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-32-LABEL: define dso_local void @_Z4foo3v(
|
|
// SIMD-ONLY-32-SAME: ) #[[ATTR0]] {
|
|
// SIMD-ONLY-32-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-32-NEXT: [[PA:%.*]] = alloca ptr, align 4
|
|
// SIMD-ONLY-32-NEXT: [[PA1:%.*]] = alloca ptr, align 4
|
|
// SIMD-ONLY-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PA1]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 50
|
|
// SIMD-ONLY-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
|
|
// SIMD-ONLY-32-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-32-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-32-LABEL: define dso_local void @_Z4foo4v(
|
|
// SIMD-ONLY-32-SAME: ) #[[ATTR0]] {
|
|
// SIMD-ONLY-32-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-32-NEXT: [[P:%.*]] = alloca i32, align 4
|
|
// SIMD-ONLY-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
|
|
// SIMD-ONLY-32-NEXT: store i32 [[INC]], ptr [[P]], align 4
|
|
// SIMD-ONLY-32-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-32-LABEL: define dso_local void @_Z4foo5i(
|
|
// SIMD-ONLY-32-SAME: i32 [[A:%.*]]) #[[ATTR0]] {
|
|
// SIMD-ONLY-32-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// SIMD-ONLY-32-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// SIMD-ONLY-32-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// SIMD-ONLY-32-NEXT: [[PA:%.*]] = alloca ptr, align 4
|
|
// SIMD-ONLY-32-NEXT: [[D1:%.*]] = alloca double, align 8
|
|
// SIMD-ONLY-32-NEXT: [[PVTARR2:%.*]] = alloca [10 x i32], align 4
|
|
// SIMD-ONLY-32-NEXT: [[PA3:%.*]] = alloca ptr, align 4
|
|
// SIMD-ONLY-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// SIMD-ONLY-32-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// SIMD-ONLY-32-NEXT: [[TMP1:%.*]] = load double, ptr [[D1]], align 8
|
|
// SIMD-ONLY-32-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 1.000000e+00
|
|
// SIMD-ONLY-32-NEXT: store double [[ADD]], ptr [[D1]], align 8
|
|
// SIMD-ONLY-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[PVTARR2]], i32 0, i32 5
|
|
// SIMD-ONLY-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
|
|
// SIMD-ONLY-32-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PA3]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 50
|
|
// SIMD-ONLY-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[INC5:%.*]] = add nsw i32 [[TMP4]], 1
|
|
// SIMD-ONLY-32-NEXT: store i32 [[INC5]], ptr [[ARRAYIDX4]], align 4
|
|
// SIMD-ONLY-32-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-32-LABEL: define dso_local void @_Z4foo6i(
|
|
// SIMD-ONLY-32-SAME: i32 [[A:%.*]]) #[[ATTR0]] {
|
|
// SIMD-ONLY-32-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// SIMD-ONLY-32-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// SIMD-ONLY-32-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// SIMD-ONLY-32-NEXT: [[PA:%.*]] = alloca ptr, align 4
|
|
// SIMD-ONLY-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// SIMD-ONLY-32-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// SIMD-ONLY-32-NEXT: [[TMP1:%.*]] = load double, ptr [[D]], align 8
|
|
// SIMD-ONLY-32-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 1.000000e+00
|
|
// SIMD-ONLY-32-NEXT: store double [[ADD]], ptr [[D]], align 8
|
|
// SIMD-ONLY-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[PVTARR]], i32 0, i32 5
|
|
// SIMD-ONLY-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
|
|
// SIMD-ONLY-32-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PA]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 50
|
|
// SIMD-ONLY-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP4]], 1
|
|
// SIMD-ONLY-32-NEXT: store i32 [[INC2]], ptr [[ARRAYIDX1]], align 4
|
|
// SIMD-ONLY-32-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-32-LABEL: define dso_local void @_Z4foo7i(
|
|
// SIMD-ONLY-32-SAME: i32 [[A:%.*]]) #[[ATTR0]] {
|
|
// SIMD-ONLY-32-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
|
// SIMD-ONLY-32-NEXT: [[D:%.*]] = alloca double, align 8
|
|
// SIMD-ONLY-32-NEXT: [[PVTARR:%.*]] = alloca [10 x i32], align 4
|
|
// SIMD-ONLY-32-NEXT: [[PA:%.*]] = alloca ptr, align 4
|
|
// SIMD-ONLY-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
|
|
// SIMD-ONLY-32-NEXT: store double [[CONV]], ptr [[D]], align 8
|
|
// SIMD-ONLY-32-NEXT: [[TMP1:%.*]] = load double, ptr [[D]], align 8
|
|
// SIMD-ONLY-32-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 1.000000e+00
|
|
// SIMD-ONLY-32-NEXT: store double [[ADD]], ptr [[D]], align 8
|
|
// SIMD-ONLY-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[PVTARR]], i32 0, i32 5
|
|
// SIMD-ONLY-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
|
|
// SIMD-ONLY-32-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PA]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 50
|
|
// SIMD-ONLY-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP4]], 1
|
|
// SIMD-ONLY-32-NEXT: store i32 [[INC2]], ptr [[ARRAYIDX1]], align 4
|
|
// SIMD-ONLY-32-NEXT: ret void
|
|
//
|
|
//
|
|
// SIMD-ONLY-32-LABEL: define dso_local void @_Z4foo8v(
|
|
// SIMD-ONLY-32-SAME: ) #[[ATTR0]] {
|
|
// SIMD-ONLY-32-NEXT: [[ENTRY:.*:]]
|
|
// SIMD-ONLY-32-NEXT: [[X:%.*]] = alloca i32, align 4
|
|
// SIMD-ONLY-32-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// SIMD-ONLY-32-NEXT: store i32 0, ptr [[X]], align 4
|
|
// SIMD-ONLY-32-NEXT: store i32 0, ptr [[I]], align 4
|
|
// SIMD-ONLY-32-NEXT: br label %[[FOR_COND:.*]]
|
|
// SIMD-ONLY-32: [[FOR_COND]]:
|
|
// SIMD-ONLY-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
|
|
// SIMD-ONLY-32-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
|
|
// SIMD-ONLY-32: [[FOR_BODY]]:
|
|
// SIMD-ONLY-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
|
|
// SIMD-ONLY-32-NEXT: store i32 [[ADD]], ptr [[X]], align 4
|
|
// SIMD-ONLY-32-NEXT: br label %[[FOR_INC:.*]]
|
|
// SIMD-ONLY-32: [[FOR_INC]]:
|
|
// SIMD-ONLY-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
|
|
// SIMD-ONLY-32-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
|
|
// SIMD-ONLY-32-NEXT: store i32 [[INC]], ptr [[I]], align 4
|
|
// SIMD-ONLY-32-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
|
|
// SIMD-ONLY-32: [[FOR_END]]:
|
|
// SIMD-ONLY-32-NEXT: ret void
|
|
//
|
|
//.
|
|
// CK-64: [[META18]] = !{}
|
|
// CK-64: [[META19]] = !{i64 4}
|
|
//.
|
|
// CK-32: [[META19]] = !{}
|
|
// CK-32: [[META20]] = !{i64 4}
|
|
//.
|
|
// SIMD-ONLY-64: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]]}
|
|
// SIMD-ONLY-64: [[META2]] = !{!"llvm.loop.mustprogress"}
|
|
//.
|
|
// SIMD-ONLY-32: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
|
|
// SIMD-ONLY-32: [[META3]] = !{!"llvm.loop.mustprogress"}
|
|
//.
|