We used to have two separate implementations to derive the number of threads used in a target region. This lead us to sometimes miss out on user provided thread bounds (num_threads, or thread_limit) when we looked for "constant default values". If we might miss out on the presence of those bounds, we cannot set the thread_limit statically since the runtime will try to honor user input rather than cap it at the "preferred default". This patch replaces the secondary implementation with the primary in a mode that will not emit code but just look for the presence, and potentially upper bounds, of thread limiting clauses. The runtime test would not pass without this rewrite as we missed some clauses, set the static limit on the device to the preferred value, but then violated that value at runtime. Fixes: https://github.com/llvm/llvm-project/issues/64845 Differential Revision: https://reviews.llvm.org/D158381
571 lines
38 KiB
C++
571 lines
38 KiB
C++
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
|
|
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=x86_64-pc-linux-gnu \
|
|
// RUN: -x c++ -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - \
|
|
// RUN: | FileCheck %s
|
|
|
|
// expected-no-diagnostics
|
|
|
|
|
|
class A {
|
|
protected:
|
|
int X;
|
|
int Y;
|
|
|
|
public:
|
|
A (int x, int y) : X { x }, Y { y } { };
|
|
};
|
|
|
|
class B : public A {
|
|
using A::X;
|
|
using A::Y;
|
|
public:
|
|
int res;
|
|
B (int x, int y) : A(x,y), res{0} {}
|
|
void run (void) {
|
|
#pragma omp target
|
|
res = X + Y;
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct descriptor
|
|
{
|
|
T *A;
|
|
T *C;
|
|
T *C_ref;
|
|
unsigned M;
|
|
unsigned K;
|
|
unsigned N;
|
|
};
|
|
|
|
class BASE
|
|
{
|
|
};
|
|
|
|
//template<typename T>
|
|
class C : public BASE
|
|
{
|
|
public:
|
|
void bar (descriptor<float> &d)
|
|
{
|
|
auto Asize = d.M * d.K;
|
|
auto Csize = d.M * d.N;
|
|
#pragma omp target data map(to:d.A[0:Asize]) map(from:d.C[0:Csize])
|
|
{
|
|
#pragma omp target teams firstprivate(Csize)
|
|
for (int i = 0; i < Csize; ++i)
|
|
d.C[i] = 1;
|
|
}
|
|
#pragma omp target map(from:d.C[0:Csize])
|
|
for (int i = 0; i < Csize; ++i)
|
|
d.C[i] = 1;
|
|
}
|
|
};
|
|
|
|
void foo() {
|
|
B b(2, 3);
|
|
b.run();
|
|
C c;
|
|
descriptor<float> d;
|
|
c.bar(d);
|
|
}
|
|
|
|
// CHECK-LABEL: define {{[^@]+}}@_Z3foov
|
|
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[B:%.*]] = alloca [[CLASS_B:%.*]], align 4
|
|
// CHECK-NEXT: [[C:%.*]] = alloca [[CLASS_C:%.*]], align 1
|
|
// CHECK-NEXT: [[D:%.*]] = alloca [[STRUCT_DESCRIPTOR:%.*]], align 8
|
|
// CHECK-NEXT: call void @_ZN1BC1Eii(ptr noundef nonnull align 4 dereferenceable(12) [[B]], i32 noundef 2, i32 noundef 3)
|
|
// CHECK-NEXT: call void @_ZN1B3runEv(ptr noundef nonnull align 4 dereferenceable(12) [[B]])
|
|
// CHECK-NEXT: call void @_ZN1C3barER10descriptorIfE(ptr noundef nonnull align 1 dereferenceable(1) [[C]], ptr noundef nonnull align 8 dereferenceable(40) [[D]])
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@_ZN1BC1Eii
|
|
// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(12) [[THIS:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
|
|
// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
|
|
// CHECK-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4
|
|
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y_ADDR]], align 4
|
|
// CHECK-NEXT: call void @_ZN1BC2Eii(ptr noundef nonnull align 4 dereferenceable(12) [[THIS1]], i32 noundef [[TMP0]], i32 noundef [[TMP1]])
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@_ZN1B3runEv
|
|
// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(12) [[THIS:%.*]]) #[[ATTR0]] comdat align 2 {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
|
|
// CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
|
|
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
|
|
// CHECK-NEXT: [[RES:%.*]] = getelementptr inbounds [[CLASS_B:%.*]], ptr [[THIS1]], i32 0, i32 1
|
|
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[CLASS_A:%.*]], ptr [[THIS1]], i32 0, i32 0
|
|
// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[CLASS_A]], ptr [[THIS1]], i32 0, i32 1
|
|
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8
|
|
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8
|
|
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP2]], align 8
|
|
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CHECK-NEXT: store ptr [[THIS1]], ptr [[TMP3]], align 8
|
|
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CHECK-NEXT: store ptr [[RES]], ptr [[TMP4]], align 8
|
|
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP5]], align 8
|
|
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
|
|
// CHECK-NEXT: store ptr [[THIS1]], ptr [[TMP6]], align 8
|
|
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
|
|
// CHECK-NEXT: store ptr [[X]], ptr [[TMP7]], align 8
|
|
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP8]], align 8
|
|
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
|
|
// CHECK-NEXT: store ptr [[THIS1]], ptr [[TMP9]], align 8
|
|
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
|
|
// CHECK-NEXT: store ptr [[Y]], ptr [[TMP10]], align 8
|
|
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP11]], align 8
|
|
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CHECK-NEXT: store i32 2, ptr [[TMP14]], align 4
|
|
// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CHECK-NEXT: store i32 4, ptr [[TMP15]], align 4
|
|
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CHECK-NEXT: store ptr [[TMP12]], ptr [[TMP16]], align 8
|
|
// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CHECK-NEXT: store ptr [[TMP13]], ptr [[TMP17]], align 8
|
|
// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CHECK-NEXT: store ptr @.offload_sizes, ptr [[TMP18]], align 8
|
|
// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CHECK-NEXT: store ptr @.offload_maptypes, ptr [[TMP19]], align 8
|
|
// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP20]], align 8
|
|
// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP21]], align 8
|
|
// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CHECK-NEXT: store i64 0, ptr [[TMP22]], align 8
|
|
// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CHECK-NEXT: store i64 0, ptr [[TMP23]], align 8
|
|
// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CHECK-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP24]], align 4
|
|
// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP25]], align 4
|
|
// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CHECK-NEXT: store i32 0, ptr [[TMP26]], align 4
|
|
// CHECK-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1B3runEv_l25.region_id, ptr [[KERNEL_ARGS]])
|
|
// CHECK-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
|
|
// CHECK-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// CHECK: omp_offload.failed:
|
|
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1B3runEv_l25(ptr [[THIS1]]) #[[ATTR3:[0-9]+]]
|
|
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
|
// CHECK: omp_offload.cont:
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@_ZN1C3barER10descriptorIfE
|
|
// CHECK-SAME: (ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] comdat align 2 {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[ASIZE:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[CSIZE:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 8
|
|
// CHECK-NEXT: [[TMP:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[CSIZE_CASTED:%.*]] = alloca i64, align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS9:%.*]] = alloca [2 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_PTRS10:%.*]] = alloca [2 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS11:%.*]] = alloca [2 x ptr], align 8
|
|
// CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CHECK-NEXT: [[_TMP12:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[CSIZE_CASTED13:%.*]] = alloca i64, align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS18:%.*]] = alloca [3 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_PTRS19:%.*]] = alloca [3 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS20:%.*]] = alloca [3 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_SIZES21:%.*]] = alloca [3 x i64], align 8
|
|
// CHECK-NEXT: [[KERNEL_ARGS22:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
|
|
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
|
|
// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: [[M:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR:%.*]], ptr [[TMP0]], i32 0, i32 3
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[M]], align 8
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: [[K:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[TMP2]], i32 0, i32 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[K]], align 4
|
|
// CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP1]], [[TMP3]]
|
|
// CHECK-NEXT: store i32 [[MUL]], ptr [[ASIZE]], align 4
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: [[M2:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[TMP4]], i32 0, i32 3
|
|
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[M2]], align 8
|
|
// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: [[N:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[TMP6]], i32 0, i32 5
|
|
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 8
|
|
// CHECK-NEXT: [[MUL3:%.*]] = mul i32 [[TMP5]], [[TMP7]]
|
|
// CHECK-NEXT: store i32 [[MUL3]], ptr [[CSIZE]], align 4
|
|
// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[TMP9]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[TMP10]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[A4]], align 8
|
|
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 0
|
|
// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ASIZE]], align 4
|
|
// CHECK-NEXT: [[CONV:%.*]] = zext i32 [[TMP12]] to i64
|
|
// CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[CONV]], 4
|
|
// CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[TMP15]], i32 0, i32 1
|
|
// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: [[C5:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[TMP16]], i32 0, i32 1
|
|
// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[C5]], align 8
|
|
// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 0
|
|
// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[CSIZE]], align 4
|
|
// CHECK-NEXT: [[CONV7:%.*]] = zext i32 [[TMP18]] to i64
|
|
// CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[CONV7]], 4
|
|
// CHECK-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[C]], i32 1
|
|
// CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
|
|
// CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[A]] to i64
|
|
// CHECK-NEXT: [[TMP23:%.*]] = sub i64 [[TMP21]], [[TMP22]]
|
|
// CHECK-NEXT: [[TMP24:%.*]] = sdiv exact i64 [[TMP23]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
|
|
// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP25]], align 8
|
|
// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: store ptr [[A]], ptr [[TMP26]], align 8
|
|
// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
|
|
// CHECK-NEXT: store i64 [[TMP24]], ptr [[TMP27]], align 8
|
|
// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP28]], align 8
|
|
// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CHECK-NEXT: store ptr [[A]], ptr [[TMP29]], align 8
|
|
// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CHECK-NEXT: store ptr [[ARRAYIDX]], ptr [[TMP30]], align 8
|
|
// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 1
|
|
// CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP31]], align 8
|
|
// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP32]], align 8
|
|
// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
|
|
// CHECK-NEXT: store ptr [[C]], ptr [[TMP33]], align 8
|
|
// CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
|
|
// CHECK-NEXT: store ptr [[ARRAYIDX6]], ptr [[TMP34]], align 8
|
|
// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 2
|
|
// CHECK-NEXT: store i64 [[TMP19]], ptr [[TMP35]], align 8
|
|
// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP36]], align 8
|
|
// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
|
|
// CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1]], i64 -1, i32 3, ptr [[TMP37]], ptr [[TMP38]], ptr [[TMP39]], ptr @.offload_maptypes.1, ptr null, ptr null)
|
|
// CHECK-NEXT: [[TMP40:%.*]] = load ptr, ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: store ptr [[TMP40]], ptr [[TMP]], align 8
|
|
// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP]], align 8
|
|
// CHECK-NEXT: store ptr [[TMP41]], ptr [[_TMP8]], align 8
|
|
// CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr [[CSIZE]], align 4
|
|
// CHECK-NEXT: store i32 [[TMP42]], ptr [[CSIZE_CASTED]], align 4
|
|
// CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[CSIZE_CASTED]], align 8
|
|
// CHECK-NEXT: [[TMP44:%.*]] = load ptr, ptr [[_TMP8]], align 8
|
|
// CHECK-NEXT: [[TMP45:%.*]] = load ptr, ptr [[_TMP8]], align 8
|
|
// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[_TMP8]], align 8
|
|
// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS9]], i32 0, i32 0
|
|
// CHECK-NEXT: store i64 [[TMP43]], ptr [[TMP47]], align 8
|
|
// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS10]], i32 0, i32 0
|
|
// CHECK-NEXT: store i64 [[TMP43]], ptr [[TMP48]], align 8
|
|
// CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS11]], i64 0, i64 0
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP49]], align 8
|
|
// CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS9]], i32 0, i32 1
|
|
// CHECK-NEXT: store ptr [[TMP45]], ptr [[TMP50]], align 8
|
|
// CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS10]], i32 0, i32 1
|
|
// CHECK-NEXT: store ptr [[TMP46]], ptr [[TMP51]], align 8
|
|
// CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS11]], i64 0, i64 1
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP52]], align 8
|
|
// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS9]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS10]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CHECK-NEXT: store i32 2, ptr [[TMP55]], align 4
|
|
// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CHECK-NEXT: store i32 2, ptr [[TMP56]], align 4
|
|
// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CHECK-NEXT: store ptr [[TMP53]], ptr [[TMP57]], align 8
|
|
// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CHECK-NEXT: store ptr [[TMP54]], ptr [[TMP58]], align 8
|
|
// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CHECK-NEXT: store ptr @.offload_sizes.2, ptr [[TMP59]], align 8
|
|
// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CHECK-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP60]], align 8
|
|
// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP61]], align 8
|
|
// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP62]], align 8
|
|
// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CHECK-NEXT: store i64 0, ptr [[TMP63]], align 8
|
|
// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CHECK-NEXT: store i64 0, ptr [[TMP64]], align 8
|
|
// CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP65]], align 4
|
|
// CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP66]], align 4
|
|
// CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CHECK-NEXT: store i32 0, ptr [[TMP67]], align 4
|
|
// CHECK-NEXT: [[TMP68:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1C3barER10descriptorIfE_l55.region_id, ptr [[KERNEL_ARGS]])
|
|
// CHECK-NEXT: [[TMP69:%.*]] = icmp ne i32 [[TMP68]], 0
|
|
// CHECK-NEXT: br i1 [[TMP69]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// CHECK: omp_offload.failed:
|
|
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1C3barER10descriptorIfE_l55(i64 [[TMP43]], ptr [[TMP44]]) #[[ATTR3]]
|
|
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
|
// CHECK: omp_offload.cont:
|
|
// CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
|
|
// CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 3, ptr [[TMP70]], ptr [[TMP71]], ptr [[TMP72]], ptr @.offload_maptypes.1, ptr null, ptr null)
|
|
// CHECK-NEXT: [[TMP73:%.*]] = load ptr, ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: store ptr [[TMP73]], ptr [[_TMP12]], align 8
|
|
// CHECK-NEXT: [[TMP74:%.*]] = load i32, ptr [[CSIZE]], align 4
|
|
// CHECK-NEXT: store i32 [[TMP74]], ptr [[CSIZE_CASTED13]], align 4
|
|
// CHECK-NEXT: [[TMP75:%.*]] = load i64, ptr [[CSIZE_CASTED13]], align 8
|
|
// CHECK-NEXT: [[TMP76:%.*]] = load ptr, ptr [[_TMP12]], align 8
|
|
// CHECK-NEXT: [[TMP77:%.*]] = load ptr, ptr [[_TMP12]], align 8
|
|
// CHECK-NEXT: [[TMP78:%.*]] = load ptr, ptr [[_TMP12]], align 8
|
|
// CHECK-NEXT: [[C14:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[TMP78]], i32 0, i32 1
|
|
// CHECK-NEXT: [[TMP79:%.*]] = load ptr, ptr [[_TMP12]], align 8
|
|
// CHECK-NEXT: [[C15:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[TMP79]], i32 0, i32 1
|
|
// CHECK-NEXT: [[TMP80:%.*]] = load ptr, ptr [[C15]], align 8
|
|
// CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 0
|
|
// CHECK-NEXT: [[TMP81:%.*]] = load i32, ptr [[CSIZE]], align 4
|
|
// CHECK-NEXT: [[CONV17:%.*]] = zext i32 [[TMP81]] to i64
|
|
// CHECK-NEXT: [[TMP82:%.*]] = mul nuw i64 [[CONV17]], 4
|
|
// CHECK-NEXT: [[TMP83:%.*]] = getelementptr ptr, ptr [[C14]], i32 1
|
|
// CHECK-NEXT: [[TMP84:%.*]] = ptrtoint ptr [[TMP83]] to i64
|
|
// CHECK-NEXT: [[TMP85:%.*]] = ptrtoint ptr [[C14]] to i64
|
|
// CHECK-NEXT: [[TMP86:%.*]] = sub i64 [[TMP84]], [[TMP85]]
|
|
// CHECK-NEXT: [[TMP87:%.*]] = sdiv exact i64 [[TMP86]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
|
|
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES21]], ptr align 8 @.offload_sizes.4, i64 24, i1 false)
|
|
// CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
|
|
// CHECK-NEXT: store i64 [[TMP75]], ptr [[TMP88]], align 8
|
|
// CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
|
|
// CHECK-NEXT: store i64 [[TMP75]], ptr [[TMP89]], align 8
|
|
// CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS20]], i64 0, i64 0
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP90]], align 8
|
|
// CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
|
|
// CHECK-NEXT: store ptr [[TMP77]], ptr [[TMP91]], align 8
|
|
// CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
|
|
// CHECK-NEXT: store ptr [[C14]], ptr [[TMP92]], align 8
|
|
// CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
|
|
// CHECK-NEXT: store i64 [[TMP87]], ptr [[TMP93]], align 8
|
|
// CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS20]], i64 0, i64 1
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP94]], align 8
|
|
// CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
|
|
// CHECK-NEXT: store ptr [[C14]], ptr [[TMP95]], align 8
|
|
// CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
|
|
// CHECK-NEXT: store ptr [[ARRAYIDX16]], ptr [[TMP96]], align 8
|
|
// CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
|
|
// CHECK-NEXT: store i64 [[TMP82]], ptr [[TMP97]], align 8
|
|
// CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS20]], i64 0, i64 2
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP98]], align 8
|
|
// CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP100:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP101:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 0
|
|
// CHECK-NEXT: store i32 2, ptr [[TMP102]], align 4
|
|
// CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 1
|
|
// CHECK-NEXT: store i32 3, ptr [[TMP103]], align 4
|
|
// CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 2
|
|
// CHECK-NEXT: store ptr [[TMP99]], ptr [[TMP104]], align 8
|
|
// CHECK-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 3
|
|
// CHECK-NEXT: store ptr [[TMP100]], ptr [[TMP105]], align 8
|
|
// CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 4
|
|
// CHECK-NEXT: store ptr [[TMP101]], ptr [[TMP106]], align 8
|
|
// CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 5
|
|
// CHECK-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP107]], align 8
|
|
// CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 6
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP108]], align 8
|
|
// CHECK-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 7
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP109]], align 8
|
|
// CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 8
|
|
// CHECK-NEXT: store i64 0, ptr [[TMP110]], align 8
|
|
// CHECK-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 9
|
|
// CHECK-NEXT: store i64 0, ptr [[TMP111]], align 8
|
|
// CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 10
|
|
// CHECK-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP112]], align 4
|
|
// CHECK-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 11
|
|
// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP113]], align 4
|
|
// CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 12
|
|
// CHECK-NEXT: store i32 0, ptr [[TMP114]], align 4
|
|
// CHECK-NEXT: [[TMP115:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1C3barER10descriptorIfE_l59.region_id, ptr [[KERNEL_ARGS22]])
|
|
// CHECK-NEXT: [[TMP116:%.*]] = icmp ne i32 [[TMP115]], 0
|
|
// CHECK-NEXT: br i1 [[TMP116]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
|
|
// CHECK: omp_offload.failed23:
|
|
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1C3barER10descriptorIfE_l59(i64 [[TMP75]], ptr [[TMP76]]) #[[ATTR3]]
|
|
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT24]]
|
|
// CHECK: omp_offload.cont24:
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@_ZN1BC2Eii
|
|
// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(12) [[THIS:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
|
|
// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
|
|
// CHECK-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4
|
|
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y_ADDR]], align 4
|
|
// CHECK-NEXT: call void @_ZN1AC2Eii(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], i32 noundef [[TMP0]], i32 noundef [[TMP1]])
|
|
// CHECK-NEXT: [[RES:%.*]] = getelementptr inbounds [[CLASS_B:%.*]], ptr [[THIS1]], i32 0, i32 1
|
|
// CHECK-NEXT: store i32 0, ptr [[RES]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@_ZN1AC2Eii
|
|
// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
|
|
// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
|
|
// CHECK-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4
|
|
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
|
|
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[CLASS_A:%.*]], ptr [[THIS1]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
|
|
// CHECK-NEXT: store i32 [[TMP0]], ptr [[X]], align 4
|
|
// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[CLASS_A]], ptr [[THIS1]], i32 0, i32 1
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y_ADDR]], align 4
|
|
// CHECK-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1B3runEv_l25
|
|
// CHECK-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
|
|
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[CLASS_A:%.*]], ptr [[TMP0]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
|
|
// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[CLASS_A]], ptr [[TMP0]], i32 0, i32 1
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
|
|
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
|
|
// CHECK-NEXT: [[RES:%.*]] = getelementptr inbounds [[CLASS_B:%.*]], ptr [[TMP0]], i32 0, i32 1
|
|
// CHECK-NEXT: store i32 [[ADD]], ptr [[RES]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1C3barER10descriptorIfE_l55
|
|
// CHECK-SAME: (i64 noundef [[CSIZE:%.*]], ptr noundef nonnull align 8 dereferenceable(40) [[D:%.*]]) #[[ATTR2]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[CSIZE_ADDR:%.*]] = alloca i64, align 8
|
|
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[TMP:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[CSIZE_CASTED:%.*]] = alloca i64, align 8
|
|
// CHECK-NEXT: store i64 [[CSIZE]], ptr [[CSIZE_ADDR]], align 8
|
|
// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSIZE_ADDR]], align 4
|
|
// CHECK-NEXT: store i32 [[TMP1]], ptr [[CSIZE_CASTED]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[CSIZE_CASTED]], align 8
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8
|
|
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1C3barER10descriptorIfE_l55.omp_outlined, i64 [[TMP2]], ptr [[TMP3]])
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1C3barER10descriptorIfE_l55.omp_outlined
|
|
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[CSIZE:%.*]], ptr noundef nonnull align 8 dereferenceable(40) [[D:%.*]]) #[[ATTR4:[0-9]+]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[CSIZE_ADDR:%.*]] = alloca i64, align 8
|
|
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[TMP:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK-NEXT: store i64 [[CSIZE]], ptr [[CSIZE_ADDR]], align 8
|
|
// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8
|
|
// CHECK-NEXT: store i32 0, ptr [[I]], align 4
|
|
// CHECK-NEXT: br label [[FOR_COND:%.*]]
|
|
// CHECK: for.cond:
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSIZE_ADDR]], align 4
|
|
// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], [[TMP2]]
|
|
// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
|
|
// CHECK: for.body:
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8
|
|
// CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR:%.*]], ptr [[TMP3]], i32 0, i32 1
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C]], align 8
|
|
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
|
|
// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP5]] to i64
|
|
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[IDXPROM]]
|
|
// CHECK-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
|
|
// CHECK-NEXT: br label [[FOR_INC:%.*]]
|
|
// CHECK: for.inc:
|
|
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4
|
|
// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1
|
|
// CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4
|
|
// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
// CHECK: for.end:
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1C3barER10descriptorIfE_l59
|
|
// CHECK-SAME: (i64 noundef [[CSIZE:%.*]], ptr noundef nonnull align 8 dereferenceable(40) [[D:%.*]]) #[[ATTR2]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[CSIZE_ADDR:%.*]] = alloca i64, align 8
|
|
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[TMP:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: store i64 [[CSIZE]], ptr [[CSIZE_ADDR]], align 8
|
|
// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8
|
|
// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8
|
|
// CHECK-NEXT: store i32 0, ptr [[I]], align 4
|
|
// CHECK-NEXT: br label [[FOR_COND:%.*]]
|
|
// CHECK: for.cond:
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSIZE_ADDR]], align 4
|
|
// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], [[TMP2]]
|
|
// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
|
|
// CHECK: for.body:
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8
|
|
// CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR:%.*]], ptr [[TMP3]], i32 0, i32 1
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C]], align 8
|
|
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
|
|
// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP5]] to i64
|
|
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[IDXPROM]]
|
|
// CHECK-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
|
|
// CHECK-NEXT: br label [[FOR_INC:%.*]]
|
|
// CHECK: for.inc:
|
|
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4
|
|
// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1
|
|
// CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4
|
|
// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
// CHECK: for.end:
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
|
|
// CHECK-SAME: () #[[ATTR6:[0-9]+]] section ".text.startup" {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: call void @__tgt_register_requires(i64 1)
|
|
// CHECK-NEXT: ret void
|
|
//
|