Summary: We use this `dyn_ptr` argument in Clang/OpenMP to handle the `KernelLaunchEnvironment`. This is a per-kernel argument used to share some information. Currenetly, it's prepended to the argument list and we generate storage for it in the runtime. This is bad for a few reasons: 1. It changes the ABI by shifting user arguments 2. It cannot be trivially be left uninitialized if unused 3. The runtime must allocate its own memory for it This PR changes it to be appended instead. Additionally, space for this is always emitted. This means the OMPIRBuilder itself will provide the storage, we simply need to populate it in the runtime if it is used. This means that if it's unused we don't always pay the cost and it's easier for non-OpenMP users to ignore it. Backward compatibility is maintained by auto-upgrading the kernel arguments. In `libomptarget` we completely allocate a new buffer to store this in the new format. The plugins still need to respect the old ABI of the called device object, so we simply rotate it if it's the old version.
2191 lines
158 KiB
C++
2191 lines
158 KiB
C++
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-globals --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --global-value-regex "\.offload_.*"
|
|
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
|
|
// RUN: -triple powerpc64le-unknown-unknown -DCUDA \
|
|
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o \
|
|
// RUN: %t-ppc-host.bc
|
|
|
|
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
|
|
// RUN: -triple nvptx64-unknown-unknown -DCUA \
|
|
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -DCUDA -emit-llvm %s \
|
|
// RUN: -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc \
|
|
// RUN: -o - | FileCheck %s --check-prefix CHECK
|
|
|
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ \
|
|
// RUN: -triple powerpc64le-unknown-unknown -DDIAG\
|
|
// RUN: -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm \
|
|
// RUN: %s -o - | FileCheck %s \
|
|
// RUN: --check-prefix=CHECK1
|
|
|
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ \
|
|
// RUN: -triple i386-unknown-unknown \
|
|
// RUN: -fopenmp-targets=i386-pc-linux-gnu -emit-llvm \
|
|
// RUN: %s -o - | FileCheck %s \
|
|
// RUN: --check-prefix=CHECK2
|
|
|
|
|
|
#if defined(CUDA)
|
|
// expected-no-diagnostics
|
|
|
|
int foo(int n) {
|
|
double *e;
|
|
//no error and no implicit map generated for e[:1]
|
|
#pragma omp target parallel reduction(+: e[:1])
|
|
*e=10;
|
|
;
|
|
return 0;
|
|
}
|
|
// CHECK-NOT @.offload_maptypes
|
|
#elif defined(DIAG)
|
|
class S2 {
|
|
mutable int a;
|
|
public:
|
|
S2():a(0) { }
|
|
S2(S2 &s2):a(s2.a) { }
|
|
S2 &operator +(S2 &s);
|
|
};
|
|
int bar() {
|
|
S2 o[5];
|
|
//warnig "copyable and not guaranteed to be mapped correctly" and
|
|
//implicit map generated.
|
|
#pragma omp target parallel reduction(+:o[0]) //expected-warning {{type 'S2' is not trivially copyable and not guaranteed to be mapped correctly}}
|
|
for (int i = 0; i < 10; i++);
|
|
double b[10][10][10];
|
|
//no error no implicit map generated, the map for b is generated but not
|
|
//for b[0:2][2:4][1].
|
|
#pragma omp target parallel for reduction(task, +: b[0:2][2:4][1])
|
|
for (long long i = 0; i < 10; ++i);
|
|
return 0;
|
|
}
|
|
// map for variable o
|
|
// map for b:
|
|
#else
|
|
// expected-no-diagnostics
|
|
|
|
// generate implicit map for array elements or array sections in reduction
|
|
// clause. In following case: the implicit map is generate for output[0]
|
|
// with map size 4 and output[:3] with map size 12.
|
|
void sum(int* input, int size, int* output)
|
|
{
|
|
#pragma omp target teams distribute parallel for reduction(+: output[0]) \
|
|
map(to: input [0:size])
|
|
for (int i = 0; i < size; i++)
|
|
output[0] += input[i];
|
|
#pragma omp target teams distribute parallel for reduction(+: output[:3]) \
|
|
map(to: input [0:size])
|
|
for (int i = 0; i < size; i++)
|
|
output[0] += input[i];
|
|
int a[10];
|
|
#pragma omp target parallel reduction(+: a[:2])
|
|
for (int i = 0; i < size; i++)
|
|
;
|
|
#pragma omp target parallel reduction(+: a[3])
|
|
for (int i = 0; i < size; i++)
|
|
;
|
|
}
|
|
#endif
|
|
int main()
|
|
{
|
|
#if defined(CUDA)
|
|
int a = foo(10);
|
|
#elif defined(DIAG)
|
|
int a = bar();
|
|
#else
|
|
const int size = 100;
|
|
int *array = new int[size];
|
|
int result = 0;
|
|
sum(array, size, &result);
|
|
#endif
|
|
return 0;
|
|
}
|
|
//.
|
|
// CHECK1: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 4, i64 0]
|
|
// CHECK1: @.offload_maptypes = private unnamed_addr constant [2 x i64] [i64 547, i64 288]
|
|
// CHECK1: @.offload_sizes.1 = private unnamed_addr constant [2 x i64] [i64 8000, i64 0]
|
|
// CHECK1: @.offload_maptypes.2 = private unnamed_addr constant [2 x i64] [i64 547, i64 288]
|
|
//.
|
|
// CHECK2: @.offload_sizes = private unnamed_addr constant [6 x i64] [i64 4, i64 4, i64 4, i64 0, i64 4, i64 0]
|
|
// CHECK2: @.offload_maptypes = private unnamed_addr constant [6 x i64] [i64 800, i64 547, i64 16384, i64 33, i64 16384, i64 288]
|
|
// CHECK2: @.offload_sizes.1 = private unnamed_addr constant [6 x i64] [i64 4, i64 12, i64 4, i64 0, i64 4, i64 0]
|
|
// CHECK2: @.offload_maptypes.2 = private unnamed_addr constant [6 x i64] [i64 800, i64 547, i64 16384, i64 33, i64 16384, i64 288]
|
|
// CHECK2: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 0]
|
|
// CHECK2: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 800, i64 547, i64 288]
|
|
// CHECK2: @.offload_sizes.5 = private unnamed_addr constant [3 x i64] [i64 4, i64 4, i64 0]
|
|
// CHECK2: @.offload_maptypes.6 = private unnamed_addr constant [3 x i64] [i64 800, i64 547, i64 288]
|
|
//.
|
|
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32
|
|
// CHECK-SAME: (ptr noundef [[E:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
|
|
// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
|
|
// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_kernel_environment, ptr [[DYN_PTR]])
|
|
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
|
// CHECK: user_code.entry:
|
|
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
|
|
// CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8
|
|
// CHECK-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1, i32 0)
|
|
// CHECK-NEXT: call void @__kmpc_target_deinit()
|
|
// CHECK-NEXT: ret void
|
|
// CHECK: worker.exit:
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined
|
|
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[E:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[E2:%.*]] = alloca double, align 8
|
|
// CHECK-NEXT: [[TMP:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
|
|
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
|
|
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i64 0
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 8
|
|
// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i64 0
|
|
// CHECK-NEXT: store double 0.000000e+00, ptr [[E2]], align 8
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8
|
|
// CHECK-NEXT: [[TMP3:%.*]] = ptrtoaddr ptr [[TMP2]] to i64
|
|
// CHECK-NEXT: [[TMP4:%.*]] = ptrtoaddr ptr [[ARRAYIDX]] to i64
|
|
// CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
|
|
// CHECK-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], 8
|
|
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[E2]], i64 [[TMP6]]
|
|
// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP]], align 8
|
|
// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8
|
|
// CHECK-NEXT: store double 1.000000e+01, ptr [[TMP8]], align 8
|
|
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
|
// CHECK-NEXT: store ptr [[E2]], ptr [[TMP9]], align 8
|
|
// CHECK-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func)
|
|
// CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1
|
|
// CHECK-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
|
|
// CHECK: .omp.reduction.then:
|
|
// CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX]], align 8
|
|
// CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[E2]], align 8
|
|
// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]]
|
|
// CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX]], align 8
|
|
// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
|
|
// CHECK: .omp.reduction.done:
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
|
|
// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
|
|
// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
|
|
// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
|
|
// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8
|
|
// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
|
|
// CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
|
|
// CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
|
|
// CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
|
|
// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
|
|
// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
|
|
// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
|
|
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
|
|
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i64 1
|
|
// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8
|
|
// CHECK-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
|
|
// CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
|
|
// CHECK-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
|
|
// CHECK-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
|
|
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i64 1
|
|
// CHECK-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
|
|
// CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
|
|
// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0
|
|
// CHECK-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1
|
|
// CHECK-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
|
|
// CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]]
|
|
// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2
|
|
// CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1
|
|
// CHECK-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0
|
|
// CHECK-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]]
|
|
// CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0
|
|
// CHECK-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]]
|
|
// CHECK-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]]
|
|
// CHECK-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
|
|
// CHECK-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
|
|
// CHECK: then:
|
|
// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3:[0-9]+]]
|
|
// CHECK-NEXT: br label [[IFCONT:%.*]]
|
|
// CHECK: else:
|
|
// CHECK-NEXT: br label [[IFCONT]]
|
|
// CHECK: ifcont:
|
|
// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1
|
|
// CHECK-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
|
|
// CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
|
|
// CHECK-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
|
|
// CHECK: then4:
|
|
// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8
|
|
// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8
|
|
// CHECK-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP34]], align 8
|
|
// CHECK-NEXT: store double [[TMP37]], ptr [[TMP36]], align 8
|
|
// CHECK-NEXT: br label [[IFCONT6:%.*]]
|
|
// CHECK: else5:
|
|
// CHECK-NEXT: br label [[IFCONT6]]
|
|
// CHECK: ifcont6:
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func
|
|
// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
|
|
// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
|
|
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
|
|
// CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
|
|
// CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
|
|
// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8
|
|
// CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4
|
|
// CHECK-NEXT: br label [[PRECOND:%.*]]
|
|
// CHECK: precond:
|
|
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 2
|
|
// CHECK-NEXT: br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]]
|
|
// CHECK: body:
|
|
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
|
|
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
|
|
// CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
|
|
// CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
|
|
// CHECK: then:
|
|
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
|
|
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP6]]
|
|
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
|
|
// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
|
|
// CHECK-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
|
|
// CHECK-NEXT: br label [[IFCONT:%.*]]
|
|
// CHECK: else:
|
|
// CHECK-NEXT: br label [[IFCONT]]
|
|
// CHECK: ifcont:
|
|
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
|
|
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
|
|
// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4
|
|
// CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]]
|
|
// CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
|
|
// CHECK: then3:
|
|
// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
|
|
// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
|
|
// CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP6]]
|
|
// CHECK-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
|
|
// CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4
|
|
// CHECK-NEXT: br label [[IFCONT5:%.*]]
|
|
// CHECK: else4:
|
|
// CHECK-NEXT: br label [[IFCONT5]]
|
|
// CHECK: ifcont5:
|
|
// CHECK-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1
|
|
// CHECK-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR]], align 4
|
|
// CHECK-NEXT: br label [[PRECOND]]
|
|
// CHECK: exit:
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@_Z3barv
|
|
// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[O:%.*]] = alloca [5 x [[CLASS_S2:%.*]]], align 4
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
|
|
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CHECK1-NEXT: [[B:%.*]] = alloca [10 x [10 x [10 x double]]], align 8
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [2 x ptr], align 8
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [2 x ptr], align 8
|
|
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [2 x ptr], align 8
|
|
// CHECK1-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
|
|
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x [[CLASS_S2]]], ptr [[O]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_S2]], ptr [[ARRAY_BEGIN]], i64 5
|
|
// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
|
|
// CHECK1: arrayctor.loop:
|
|
// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
|
|
// CHECK1-NEXT: call void @_ZN2S2C1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
|
|
// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_S2]], ptr [[ARRAYCTOR_CUR]], i64 1
|
|
// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
|
|
// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
|
|
// CHECK1: arrayctor.cont:
|
|
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x [[CLASS_S2]]], ptr [[O]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: store ptr [[O]], ptr [[TMP0]], align 8
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: store ptr [[ARRAYIDX]], ptr [[TMP1]], align 8
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CHECK1-NEXT: store ptr null, ptr [[TMP3]], align 8
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
|
|
// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP12]], align 8
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP13]], align 8
|
|
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
|
|
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8
|
|
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CHECK1-NEXT: store i64 0, ptr [[TMP16]], align 8
|
|
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CHECK1-NEXT: store i64 0, ptr [[TMP17]], align 8
|
|
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP18]], align 4
|
|
// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4
|
|
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CHECK1-NEXT: store i32 0, ptr [[TMP20]], align 4
|
|
// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.region_id, ptr [[KERNEL_ARGS]])
|
|
// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
|
|
// CHECK1-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// CHECK1: omp_offload.failed:
|
|
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50(ptr [[O]], ptr null) #[[ATTR6:[0-9]+]]
|
|
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
|
// CHECK1: omp_offload.cont:
|
|
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
|
|
// CHECK1-NEXT: store ptr [[B]], ptr [[TMP23]], align 8
|
|
// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
|
|
// CHECK1-NEXT: store ptr [[B]], ptr [[TMP24]], align 8
|
|
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0
|
|
// CHECK1-NEXT: store ptr null, ptr [[TMP25]], align 8
|
|
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1
|
|
// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8
|
|
// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1
|
|
// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8
|
|
// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 1
|
|
// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8
|
|
// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
|
|
// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4
|
|
// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
|
|
// CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4
|
|
// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
|
|
// CHECK1-NEXT: store ptr [[TMP29]], ptr [[TMP33]], align 8
|
|
// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
|
|
// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP34]], align 8
|
|
// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
|
|
// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP35]], align 8
|
|
// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
|
|
// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP36]], align 8
|
|
// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
|
|
// CHECK1-NEXT: store ptr null, ptr [[TMP37]], align 8
|
|
// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
|
|
// CHECK1-NEXT: store ptr null, ptr [[TMP38]], align 8
|
|
// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
|
|
// CHECK1-NEXT: store i64 0, ptr [[TMP39]], align 8
|
|
// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9
|
|
// CHECK1-NEXT: store i64 0, ptr [[TMP40]], align 8
|
|
// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10
|
|
// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP41]], align 4
|
|
// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11
|
|
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP42]], align 4
|
|
// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12
|
|
// CHECK1-NEXT: store i32 0, ptr [[TMP43]], align 4
|
|
// CHECK1-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.region_id, ptr [[KERNEL_ARGS4]])
|
|
// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
|
|
// CHECK1-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
|
|
// CHECK1: omp_offload.failed5:
|
|
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55(ptr [[B]], ptr null) #[[ATTR6]]
|
|
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT6]]
|
|
// CHECK1: omp_offload.cont6:
|
|
// CHECK1-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@_ZN2S2C1Ev
|
|
// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
|
|
// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
|
|
// CHECK1-NEXT: call void @_ZN2S2C2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]])
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50
|
|
// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(20) [[O:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8
|
|
// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
|
|
// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.omp_outlined, ptr [[TMP0]])
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.omp_outlined
|
|
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(20) [[O:%.*]]) #[[ATTR1]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: [[O1:%.*]] = alloca [[CLASS_S2:%.*]], align 4
|
|
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
|
|
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !nonnull [[META6]], !align [[META7]]
|
|
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x [[CLASS_S2]]], ptr [[TMP0]], i64 0, i64 0
|
|
// CHECK1-NEXT: call void @_ZN2S2C1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[O1]])
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = ptrtoaddr ptr [[TMP0]] to i64
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoaddr ptr [[ARRAYIDX]] to i64
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], 4
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[CLASS_S2]], ptr [[O1]], i64 [[TMP4]]
|
|
// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
|
|
// CHECK1-NEXT: br label [[FOR_COND:%.*]]
|
|
// CHECK1: for.cond:
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4
|
|
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], 10
|
|
// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
|
|
// CHECK1: for.body:
|
|
// CHECK1-NEXT: br label [[FOR_INC:%.*]]
|
|
// CHECK1: for.inc:
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4
|
|
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
|
|
// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4
|
|
// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
// CHECK1: for.end:
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
|
// CHECK1-NEXT: store ptr [[O1]], ptr [[TMP8]], align 8
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK1-NEXT: switch i32 [[TMP11]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK1-NEXT: ]
|
|
// CHECK1: .omp.reduction.case1:
|
|
// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], ptr noundef nonnull align 4 dereferenceable(4) [[O1]])
|
|
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX]], ptr align 4 [[CALL]], i64 4, i1 false)
|
|
// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK1: .omp.reduction.case2:
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
|
|
// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var)
|
|
// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], ptr noundef nonnull align 4 dereferenceable(4) [[O1]])
|
|
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX]], ptr align 4 [[CALL2]], i64 4, i1 false)
|
|
// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var)
|
|
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK1: .omp.reduction.default:
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.omp_outlined.omp.reduction.reduction_func
|
|
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
|
|
// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP5]])
|
|
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[CALL]], i64 4, i1 false)
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55
|
|
// CHECK1-SAME: (ptr noundef nonnull align 8 dereferenceable(8000) [[B:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META6]], !align [[META12:![0-9]+]]
|
|
// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.omp_outlined, ptr [[TMP0]])
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.omp_outlined
|
|
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8000) [[B:%.*]]) #[[ATTR1]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [1 x [[STRUCT_KMP_TASKRED_INPUT_T:%.*]]], align 8
|
|
// CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8
|
|
// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8
|
|
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META6]], !align [[META12]]
|
|
// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
|
|
// CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8
|
|
// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
|
|
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [10 x double], ptr [[ARRAYDECAY]], i64 2
|
|
// CHECK1-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX1]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw double, ptr [[ARRAYDECAY2]], i64 1
|
|
// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1
|
|
// CHECK1-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX4]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [10 x double], ptr [[ARRAYDECAY5]], i64 5
|
|
// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX6]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw double, ptr [[ARRAYDECAY7]], i64 1
|
|
// CHECK1-NEXT: [[TMP1:%.*]] = ptrtoaddr ptr [[ARRAYIDX8]] to i64
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoaddr ptr [[ARRAYIDX3]] to i64
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]]
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], 8
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = add nuw i64 [[TMP4]], 1
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64)
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave.p0()
|
|
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8
|
|
// CHECK1-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP5]], align 8
|
|
// CHECK1-NEXT: store i64 [[TMP5]], ptr [[__VLA_EXPR0]], align 8
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[VLA]], i64 [[TMP5]]
|
|
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP8]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
|
|
// CHECK1: omp.arrayinit.body:
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
|
|
// CHECK1-NEXT: store double 0.000000e+00, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
|
|
// CHECK1: omp.arrayinit.done:
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoaddr ptr [[TMP0]] to i64
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoaddr ptr [[ARRAYIDX3]] to i64
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]]
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], 8
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[VLA]], i64 [[TMP12]]
|
|
// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [1 x [[STRUCT_KMP_TASKRED_INPUT_T]]], ptr [[DOTRD_INPUT_]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0
|
|
// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX9]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw [10 x double], ptr [[ARRAYDECAY10]], i64 2
|
|
// CHECK1-NEXT: [[ARRAYDECAY12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw double, ptr [[ARRAYDECAY12]], i64 1
|
|
// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1
|
|
// CHECK1-NEXT: [[ARRAYDECAY15:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX14]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw [10 x double], ptr [[ARRAYDECAY15]], i64 5
|
|
// CHECK1-NEXT: [[ARRAYDECAY17:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX16]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw double, ptr [[ARRAYDECAY17]], i64 1
|
|
// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP14]], align 8
|
|
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1
|
|
// CHECK1-NEXT: store ptr [[ARRAYIDX13]], ptr [[TMP15]], align 8
|
|
// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoaddr ptr [[ARRAYIDX18]] to i64
|
|
// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoaddr ptr [[ARRAYIDX13]] to i64
|
|
// CHECK1-NEXT: [[TMP18:%.*]] = sub i64 [[TMP16]], [[TMP17]]
|
|
// CHECK1-NEXT: [[TMP19:%.*]] = sdiv exact i64 [[TMP18]], 8
|
|
// CHECK1-NEXT: [[TMP20:%.*]] = add nuw i64 [[TMP19]], 1
|
|
// CHECK1-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64)
|
|
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2
|
|
// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP22]], align 8
|
|
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3
|
|
// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP23]], align 8
|
|
// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4
|
|
// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8
|
|
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5
|
|
// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP25]], align 8
|
|
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6
|
|
// CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 8
|
|
// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
|
|
// CHECK1-NEXT: [[TMP29:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i32 1, ptr [[DOTRD_INPUT_]])
|
|
// CHECK1-NEXT: store ptr [[TMP29]], ptr [[DOTTASK_RED_]], align 8
|
|
// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
|
|
// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP31]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
|
|
// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
|
|
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP32]], 9
|
|
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK1: cond.true:
|
|
// CHECK1-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK1: cond.false:
|
|
// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
|
|
// CHECK1-NEXT: br label [[COND_END]]
|
|
// CHECK1: cond.end:
|
|
// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP33]], [[COND_FALSE]] ]
|
|
// CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8
|
|
// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
|
|
// CHECK1-NEXT: store i64 [[TMP34]], ptr [[DOTOMP_IV]], align 8
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK1: omp.inner.for.cond:
|
|
// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
|
|
// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
|
|
// CHECK1-NEXT: [[CMP19:%.*]] = icmp sle i64 [[TMP35]], [[TMP36]]
|
|
// CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
|
|
// CHECK1: omp.inner.for.cond.cleanup:
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK1: omp.inner.for.body:
|
|
// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
|
|
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP37]], 1
|
|
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]]
|
|
// CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8
|
|
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK1: omp.body.continue:
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK1: omp.inner.for.inc:
|
|
// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
|
|
// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP38]], 1
|
|
// CHECK1-NEXT: store i64 [[ADD20]], ptr [[DOTOMP_IV]], align 8
|
|
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
|
|
// CHECK1: omp.inner.for.end:
|
|
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK1: omp.loop.exit:
|
|
// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4
|
|
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP40]])
|
|
// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4
|
|
// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB2]], i32 [[TMP42]], i32 1)
|
|
// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
|
// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP43]], align 8
|
|
// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
|
|
// CHECK1-NEXT: [[TMP45:%.*]] = inttoptr i64 [[TMP5]] to ptr
|
|
// CHECK1-NEXT: store ptr [[TMP45]], ptr [[TMP44]], align 8
|
|
// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4
|
|
// CHECK1-NEXT: [[TMP48:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP47]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK1-NEXT: switch i32 [[TMP48]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK1-NEXT: ]
|
|
// CHECK1: .omp.reduction.case1:
|
|
// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr double, ptr [[ARRAYIDX3]], i64 [[TMP5]]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP49]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK1: omp.arraycpy.body:
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK1-NEXT: [[TMP50:%.*]] = load double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8
|
|
// CHECK1-NEXT: [[TMP51:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[ADD22:%.*]] = fadd double [[TMP50]], [[TMP51]]
|
|
// CHECK1-NEXT: store double [[ADD22]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP49]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK1: omp.arraycpy.done25:
|
|
// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP47]], ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK1: .omp.reduction.case2:
|
|
// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr double, ptr [[ARRAYIDX3]], i64 [[TMP5]]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY26:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP52]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY26]], label [[OMP_ARRAYCPY_DONE33:%.*]], label [[OMP_ARRAYCPY_BODY27:%.*]]
|
|
// CHECK1: omp.arraycpy.body27:
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST28:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT31:%.*]], [[OMP_ARRAYCPY_BODY27]] ]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST29:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY27]] ]
|
|
// CHECK1-NEXT: [[TMP53:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8
|
|
// CHECK1-NEXT: [[TMP54:%.*]] = atomicrmw fadd ptr [[OMP_ARRAYCPY_DESTELEMENTPAST29]], double [[TMP53]] monotonic, align 8
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT30]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST29]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT31]] = getelementptr double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST28]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE32:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT30]], [[TMP52]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_BODY27]]
|
|
// CHECK1: omp.arraycpy.done33:
|
|
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK1: .omp.reduction.default:
|
|
// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
|
|
// CHECK1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP55]])
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@.red_init.
|
|
// CHECK1-SAME: (ptr noalias noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR2]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}})
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[TMP2]], i64 [[TMP4]]
|
|
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
|
|
// CHECK1: omp.arrayinit.body:
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
|
|
// CHECK1-NEXT: store double 0.000000e+00, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
|
|
// CHECK1: omp.arrayinit.done:
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@.red_comb.
|
|
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR2]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}})
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP4]], i64 [[TMP3]]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP6]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK1: omp.arraycpy.body:
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP7]], [[TMP8]]
|
|
// CHECK1-NEXT: store double [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK1: omp.arraycpy.done2:
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.omp_outlined.omp.reduction.reduction_func
|
|
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR2]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
|
|
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
|
|
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
|
|
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0
|
|
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
|
|
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1
|
|
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
|
|
// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
|
|
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP7]], i64 [[TMP10]]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP11]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK1: omp.arraycpy.body:
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]]
|
|
// CHECK1-NEXT: store double [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]]
|
|
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK1: omp.arraycpy.done2:
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@_ZN2S2C2Ev
|
|
// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
|
|
// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
|
|
// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
|
|
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[CLASS_S2:%.*]], ptr [[THIS1]], i32 0, i32 0
|
|
// CHECK1-NEXT: store i32 0, ptr [[A]], align 4
|
|
// CHECK1-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK1-LABEL: define {{[^@]+}}@main
|
|
// CHECK1-SAME: () #[[ATTR9:[0-9]+]] {
|
|
// CHECK1-NEXT: entry:
|
|
// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
|
|
// CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4
|
|
// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3barv()
|
|
// CHECK1-NEXT: store i32 [[CALL]], ptr [[A]], align 4
|
|
// CHECK1-NEXT: ret i32 0
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@_Z3sumPiiS_
|
|
// CHECK2-SAME: (ptr noundef [[INPUT:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [6 x ptr], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [6 x ptr], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [6 x ptr], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [6 x i64], align 4
|
|
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
|
// CHECK2-NEXT: [[SIZE_CASTED4:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [6 x ptr], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [6 x ptr], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [6 x ptr], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_SIZES10:%.*]] = alloca [6 x i64], align 4
|
|
// CHECK2-NEXT: [[_TMP11:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_13:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
|
|
// CHECK2-NEXT: [[A:%.*]] = alloca [10 x i32], align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED21:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS23:%.*]] = alloca [3 x ptr], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS24:%.*]] = alloca [3 x ptr], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS25:%.*]] = alloca [3 x ptr], align 4
|
|
// CHECK2-NEXT: [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
|
|
// CHECK2-NEXT: [[SIZE_CASTED29:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS31:%.*]] = alloca [3 x ptr], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_PTRS32:%.*]] = alloca [3 x ptr], align 4
|
|
// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS33:%.*]] = alloca [3 x ptr], align 4
|
|
// CHECK2-NEXT: [[KERNEL_ARGS34:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
|
|
// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP0]], ptr [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 0
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
|
|
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 48, i1 false)
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP12]], align 4
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP13]], align 4
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
|
|
// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP14]], align 4
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
|
|
// CHECK2-NEXT: store ptr [[ARRAYIDX]], ptr [[TMP15]], align 4
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP16]], align 4
|
|
// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr [[OUTPUT_ADDR]], ptr [[TMP17]], align 4
|
|
// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr [[ARRAYIDX]], ptr [[TMP18]], align 4
|
|
// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP19]], align 4
|
|
// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
|
|
// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP20]], align 4
|
|
// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
|
|
// CHECK2-NEXT: store ptr [[ARRAYIDX1]], ptr [[TMP21]], align 4
|
|
// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 3
|
|
// CHECK2-NEXT: store i64 [[TMP10]], ptr [[TMP22]], align 4
|
|
// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP23]], align 4
|
|
// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
|
|
// CHECK2-NEXT: store ptr [[INPUT_ADDR]], ptr [[TMP24]], align 4
|
|
// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
|
|
// CHECK2-NEXT: store ptr [[ARRAYIDX1]], ptr [[TMP25]], align 4
|
|
// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP26]], align 4
|
|
// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP27]], align 4
|
|
// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP28]], align 4
|
|
// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP29]], align 4
|
|
// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP33]], ptr [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0
|
|
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
|
|
// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
|
|
// CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
|
|
// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], 1
|
|
// CHECK2-NEXT: [[TMP36:%.*]] = zext i32 [[ADD]] to i64
|
|
// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 4, ptr [[TMP37]], align 4
|
|
// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
|
// CHECK2-NEXT: store i32 6, ptr [[TMP38]], align 4
|
|
// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr [[TMP30]], ptr [[TMP39]], align 4
|
|
// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
|
// CHECK2-NEXT: store ptr [[TMP31]], ptr [[TMP40]], align 4
|
|
// CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
|
// CHECK2-NEXT: store ptr [[TMP32]], ptr [[TMP41]], align 4
|
|
// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
|
// CHECK2-NEXT: store ptr @.offload_maptypes, ptr [[TMP42]], align 4
|
|
// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP43]], align 4
|
|
// CHECK2-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP44]], align 4
|
|
// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
|
// CHECK2-NEXT: store i64 [[TMP36]], ptr [[TMP45]], align 8
|
|
// CHECK2-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
|
// CHECK2-NEXT: store i64 0, ptr [[TMP46]], align 8
|
|
// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
|
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4
|
|
// CHECK2-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
|
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP48]], align 4
|
|
// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
|
// CHECK2-NEXT: store i32 0, ptr [[TMP49]], align 4
|
|
// CHECK2-NEXT: [[TMP50:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.region_id, ptr [[KERNEL_ARGS]])
|
|
// CHECK2-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
|
|
// CHECK2-NEXT: br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// CHECK2: omp_offload.failed:
|
|
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69(i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr null) #[[ATTR2:[0-9]+]]
|
|
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
|
// CHECK2: omp_offload.cont:
|
|
// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP52]], ptr [[SIZE_CASTED4]], align 4
|
|
// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[SIZE_CASTED4]], align 4
|
|
// CHECK2-NEXT: [[TMP54:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP55:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP56:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP57:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP57]], i32 0
|
|
// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP59]], i32 0
|
|
// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP61:%.*]] = mul nuw i32 [[TMP60]], 4
|
|
// CHECK2-NEXT: [[TMP62:%.*]] = sext i32 [[TMP61]] to i64
|
|
// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES10]], ptr align 4 @.offload_sizes.1, i32 48, i1 false)
|
|
// CHECK2-NEXT: [[TMP63:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 [[TMP53]], ptr [[TMP63]], align 4
|
|
// CHECK2-NEXT: [[TMP64:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 [[TMP53]], ptr [[TMP64]], align 4
|
|
// CHECK2-NEXT: [[TMP65:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 0
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP65]], align 4
|
|
// CHECK2-NEXT: [[TMP66:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 1
|
|
// CHECK2-NEXT: store ptr [[TMP56]], ptr [[TMP66]], align 4
|
|
// CHECK2-NEXT: [[TMP67:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 1
|
|
// CHECK2-NEXT: store ptr [[ARRAYIDX5]], ptr [[TMP67]], align 4
|
|
// CHECK2-NEXT: [[TMP68:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 1
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP68]], align 4
|
|
// CHECK2-NEXT: [[TMP69:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr [[OUTPUT_ADDR]], ptr [[TMP69]], align 4
|
|
// CHECK2-NEXT: [[TMP70:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr [[ARRAYIDX5]], ptr [[TMP70]], align 4
|
|
// CHECK2-NEXT: [[TMP71:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP71]], align 4
|
|
// CHECK2-NEXT: [[TMP72:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 3
|
|
// CHECK2-NEXT: store ptr [[TMP58]], ptr [[TMP72]], align 4
|
|
// CHECK2-NEXT: [[TMP73:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 3
|
|
// CHECK2-NEXT: store ptr [[ARRAYIDX6]], ptr [[TMP73]], align 4
|
|
// CHECK2-NEXT: [[TMP74:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES10]], i32 0, i32 3
|
|
// CHECK2-NEXT: store i64 [[TMP62]], ptr [[TMP74]], align 4
|
|
// CHECK2-NEXT: [[TMP75:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 3
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP75]], align 4
|
|
// CHECK2-NEXT: [[TMP76:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 4
|
|
// CHECK2-NEXT: store ptr [[INPUT_ADDR]], ptr [[TMP76]], align 4
|
|
// CHECK2-NEXT: [[TMP77:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 4
|
|
// CHECK2-NEXT: store ptr [[ARRAYIDX6]], ptr [[TMP77]], align 4
|
|
// CHECK2-NEXT: [[TMP78:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 4
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP78]], align 4
|
|
// CHECK2-NEXT: [[TMP79:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 5
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP79]], align 4
|
|
// CHECK2-NEXT: [[TMP80:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 5
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP80]], align 4
|
|
// CHECK2-NEXT: [[TMP81:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 5
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP81]], align 4
|
|
// CHECK2-NEXT: [[TMP82:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP83:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP84:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES10]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP85:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP85]], ptr [[DOTCAPTURE_EXPR_12]], align 4
|
|
// CHECK2-NEXT: [[TMP86:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4
|
|
// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP86]], 0
|
|
// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
|
|
// CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i32 [[DIV15]], 1
|
|
// CHECK2-NEXT: store i32 [[SUB16]], ptr [[DOTCAPTURE_EXPR_13]], align 4
|
|
// CHECK2-NEXT: [[TMP87:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4
|
|
// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP87]], 1
|
|
// CHECK2-NEXT: [[TMP88:%.*]] = zext i32 [[ADD17]] to i64
|
|
// CHECK2-NEXT: [[TMP89:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 4, ptr [[TMP89]], align 4
|
|
// CHECK2-NEXT: [[TMP90:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1
|
|
// CHECK2-NEXT: store i32 6, ptr [[TMP90]], align 4
|
|
// CHECK2-NEXT: [[TMP91:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr [[TMP82]], ptr [[TMP91]], align 4
|
|
// CHECK2-NEXT: [[TMP92:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 3
|
|
// CHECK2-NEXT: store ptr [[TMP83]], ptr [[TMP92]], align 4
|
|
// CHECK2-NEXT: [[TMP93:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 4
|
|
// CHECK2-NEXT: store ptr [[TMP84]], ptr [[TMP93]], align 4
|
|
// CHECK2-NEXT: [[TMP94:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 5
|
|
// CHECK2-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP94]], align 4
|
|
// CHECK2-NEXT: [[TMP95:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 6
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP95]], align 4
|
|
// CHECK2-NEXT: [[TMP96:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 7
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP96]], align 4
|
|
// CHECK2-NEXT: [[TMP97:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 8
|
|
// CHECK2-NEXT: store i64 [[TMP88]], ptr [[TMP97]], align 8
|
|
// CHECK2-NEXT: [[TMP98:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 9
|
|
// CHECK2-NEXT: store i64 0, ptr [[TMP98]], align 8
|
|
// CHECK2-NEXT: [[TMP99:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 10
|
|
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP99]], align 4
|
|
// CHECK2-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 11
|
|
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP100]], align 4
|
|
// CHECK2-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 12
|
|
// CHECK2-NEXT: store i32 0, ptr [[TMP101]], align 4
|
|
// CHECK2-NEXT: [[TMP102:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.region_id, ptr [[KERNEL_ARGS18]])
|
|
// CHECK2-NEXT: [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
|
|
// CHECK2-NEXT: br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]]
|
|
// CHECK2: omp_offload.failed19:
|
|
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73(i32 [[TMP53]], ptr [[TMP54]], ptr [[TMP55]], ptr null) #[[ATTR2]]
|
|
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT20]]
|
|
// CHECK2: omp_offload.cont20:
|
|
// CHECK2-NEXT: [[TMP104:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP104]], ptr [[SIZE_CASTED21]], align 4
|
|
// CHECK2-NEXT: [[TMP105:%.*]] = load i32, ptr [[SIZE_CASTED21]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds nuw [10 x i32], ptr [[A]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP106:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 [[TMP105]], ptr [[TMP106]], align 4
|
|
// CHECK2-NEXT: [[TMP107:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 [[TMP105]], ptr [[TMP107]], align 4
|
|
// CHECK2-NEXT: [[TMP108:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 0
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP108]], align 4
|
|
// CHECK2-NEXT: [[TMP109:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 1
|
|
// CHECK2-NEXT: store ptr [[A]], ptr [[TMP109]], align 4
|
|
// CHECK2-NEXT: [[TMP110:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 1
|
|
// CHECK2-NEXT: store ptr [[ARRAYIDX22]], ptr [[TMP110]], align 4
|
|
// CHECK2-NEXT: [[TMP111:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 1
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP111]], align 4
|
|
// CHECK2-NEXT: [[TMP112:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP112]], align 4
|
|
// CHECK2-NEXT: [[TMP113:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP113]], align 4
|
|
// CHECK2-NEXT: [[TMP114:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP114]], align 4
|
|
// CHECK2-NEXT: [[TMP115:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP116:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP117:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 4, ptr [[TMP117]], align 4
|
|
// CHECK2-NEXT: [[TMP118:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1
|
|
// CHECK2-NEXT: store i32 3, ptr [[TMP118]], align 4
|
|
// CHECK2-NEXT: [[TMP119:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr [[TMP115]], ptr [[TMP119]], align 4
|
|
// CHECK2-NEXT: [[TMP120:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 3
|
|
// CHECK2-NEXT: store ptr [[TMP116]], ptr [[TMP120]], align 4
|
|
// CHECK2-NEXT: [[TMP121:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 4
|
|
// CHECK2-NEXT: store ptr @.offload_sizes.3, ptr [[TMP121]], align 4
|
|
// CHECK2-NEXT: [[TMP122:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 5
|
|
// CHECK2-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP122]], align 4
|
|
// CHECK2-NEXT: [[TMP123:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 6
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP123]], align 4
|
|
// CHECK2-NEXT: [[TMP124:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 7
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP124]], align 4
|
|
// CHECK2-NEXT: [[TMP125:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 8
|
|
// CHECK2-NEXT: store i64 0, ptr [[TMP125]], align 8
|
|
// CHECK2-NEXT: [[TMP126:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 9
|
|
// CHECK2-NEXT: store i64 0, ptr [[TMP126]], align 8
|
|
// CHECK2-NEXT: [[TMP127:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 10
|
|
// CHECK2-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP127]], align 4
|
|
// CHECK2-NEXT: [[TMP128:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 11
|
|
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP128]], align 4
|
|
// CHECK2-NEXT: [[TMP129:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 12
|
|
// CHECK2-NEXT: store i32 0, ptr [[TMP129]], align 4
|
|
// CHECK2-NEXT: [[TMP130:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78.region_id, ptr [[KERNEL_ARGS26]])
|
|
// CHECK2-NEXT: [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0
|
|
// CHECK2-NEXT: br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]]
|
|
// CHECK2: omp_offload.failed27:
|
|
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78(i32 [[TMP105]], ptr [[A]], ptr null) #[[ATTR2]]
|
|
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT28]]
|
|
// CHECK2: omp_offload.cont28:
|
|
// CHECK2-NEXT: [[TMP132:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP132]], ptr [[SIZE_CASTED29]], align 4
|
|
// CHECK2-NEXT: [[TMP133:%.*]] = load i32, ptr [[SIZE_CASTED29]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds [10 x i32], ptr [[A]], i32 0, i32 3
|
|
// CHECK2-NEXT: [[TMP134:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 [[TMP133]], ptr [[TMP134]], align 4
|
|
// CHECK2-NEXT: [[TMP135:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS32]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 [[TMP133]], ptr [[TMP135]], align 4
|
|
// CHECK2-NEXT: [[TMP136:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS33]], i32 0, i32 0
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP136]], align 4
|
|
// CHECK2-NEXT: [[TMP137:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 1
|
|
// CHECK2-NEXT: store ptr [[A]], ptr [[TMP137]], align 4
|
|
// CHECK2-NEXT: [[TMP138:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS32]], i32 0, i32 1
|
|
// CHECK2-NEXT: store ptr [[ARRAYIDX30]], ptr [[TMP138]], align 4
|
|
// CHECK2-NEXT: [[TMP139:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS33]], i32 0, i32 1
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP139]], align 4
|
|
// CHECK2-NEXT: [[TMP140:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP140]], align 4
|
|
// CHECK2-NEXT: [[TMP141:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS32]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP141]], align 4
|
|
// CHECK2-NEXT: [[TMP142:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS33]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP142]], align 4
|
|
// CHECK2-NEXT: [[TMP143:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP144:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS32]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 0
|
|
// CHECK2-NEXT: store i32 4, ptr [[TMP145]], align 4
|
|
// CHECK2-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 1
|
|
// CHECK2-NEXT: store i32 3, ptr [[TMP146]], align 4
|
|
// CHECK2-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 2
|
|
// CHECK2-NEXT: store ptr [[TMP143]], ptr [[TMP147]], align 4
|
|
// CHECK2-NEXT: [[TMP148:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 3
|
|
// CHECK2-NEXT: store ptr [[TMP144]], ptr [[TMP148]], align 4
|
|
// CHECK2-NEXT: [[TMP149:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 4
|
|
// CHECK2-NEXT: store ptr @.offload_sizes.5, ptr [[TMP149]], align 4
|
|
// CHECK2-NEXT: [[TMP150:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 5
|
|
// CHECK2-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP150]], align 4
|
|
// CHECK2-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 6
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP151]], align 4
|
|
// CHECK2-NEXT: [[TMP152:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 7
|
|
// CHECK2-NEXT: store ptr null, ptr [[TMP152]], align 4
|
|
// CHECK2-NEXT: [[TMP153:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 8
|
|
// CHECK2-NEXT: store i64 0, ptr [[TMP153]], align 8
|
|
// CHECK2-NEXT: [[TMP154:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 9
|
|
// CHECK2-NEXT: store i64 0, ptr [[TMP154]], align 8
|
|
// CHECK2-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 10
|
|
// CHECK2-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP155]], align 4
|
|
// CHECK2-NEXT: [[TMP156:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 11
|
|
// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP156]], align 4
|
|
// CHECK2-NEXT: [[TMP157:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 12
|
|
// CHECK2-NEXT: store i32 0, ptr [[TMP157]], align 4
|
|
// CHECK2-NEXT: [[TMP158:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81.region_id, ptr [[KERNEL_ARGS34]])
|
|
// CHECK2-NEXT: [[TMP159:%.*]] = icmp ne i32 [[TMP158]], 0
|
|
// CHECK2-NEXT: br i1 [[TMP159]], label [[OMP_OFFLOAD_FAILED35:%.*]], label [[OMP_OFFLOAD_CONT36:%.*]]
|
|
// CHECK2: omp_offload.failed35:
|
|
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81(i32 [[TMP133]], ptr [[A]], ptr null) #[[ATTR2]]
|
|
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT36]]
|
|
// CHECK2: omp_offload.cont36:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69
|
|
// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP0]], ptr [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined, i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]])
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined
|
|
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[OUTPUT1:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
|
|
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0
|
|
// CHECK2-NEXT: store i32 0, ptr [[OUTPUT1]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = ptrtoaddr ptr [[TMP1]] to i32
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = ptrtoaddr ptr [[ARRAYIDX]] to i32
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = sdiv exact i32 [[TMP4]], 4
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[OUTPUT1]], i32 [[TMP5]]
|
|
// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP]], align 4
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0
|
|
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
|
|
// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
|
|
// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
|
|
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]]
|
|
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
|
|
// CHECK2: omp.precond.then:
|
|
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
|
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
|
|
// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]]
|
|
// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK2: cond.true:
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK2: cond.false:
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END]]
|
|
// CHECK2: cond.end:
|
|
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
|
|
// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK2: omp.inner.for.cond:
|
|
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]]
|
|
// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK2: omp.inner.for.body:
|
|
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
|
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP22]], ptr [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 4
|
|
// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined.omp_outlined, i32 [[TMP20]], i32 [[TMP21]], i32 [[TMP23]], ptr [[TMP24]], ptr [[TMP25]])
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK2: omp.inner.for.inc:
|
|
// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
|
|
// CHECK2: omp.inner.for.end:
|
|
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK2: omp.loop.exit:
|
|
// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]])
|
|
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
|
|
// CHECK2: omp.precond.end:
|
|
// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK2-NEXT: store ptr [[OUTPUT1]], ptr [[TMP30]], align 4
|
|
// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
|
|
// CHECK2-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP32]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: switch i32 [[TMP33]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK2-NEXT: ]
|
|
// CHECK2: .omp.reduction.case1:
|
|
// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[OUTPUT1]], align 4
|
|
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]]
|
|
// CHECK2-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP32]], ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.case2:
|
|
// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[OUTPUT1]], align 4
|
|
// CHECK2-NEXT: [[TMP37:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP36]] monotonic, align 4
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.default:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined.omp_outlined
|
|
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT3:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
|
|
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
|
|
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
|
|
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
|
|
// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
|
|
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
|
|
// CHECK2: omp.precond.then:
|
|
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
|
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
|
|
// CHECK2-NEXT: store i32 0, ptr [[OUTPUT3]], align 4
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = ptrtoaddr ptr [[TMP7]] to i32
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = ptrtoaddr ptr [[ARRAYIDX]] to i32
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = sub i32 [[TMP8]], [[TMP9]]
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = sdiv exact i32 [[TMP10]], 4
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[OUTPUT3]], i32 [[TMP11]]
|
|
// CHECK2-NEXT: store ptr [[TMP12]], ptr [[_TMP4]], align 4
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]]
|
|
// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK2: cond.true:
|
|
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK2: cond.false:
|
|
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END]]
|
|
// CHECK2: cond.end:
|
|
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ]
|
|
// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK2: omp.inner.for.cond:
|
|
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]]
|
|
// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK2: omp.inner.for.body:
|
|
// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], ptr [[I5]], align 4
|
|
// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]]
|
|
// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4
|
|
// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0
|
|
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4
|
|
// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP25]]
|
|
// CHECK2-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX9]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK2: omp.body.continue:
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK2: omp.inner.for.inc:
|
|
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP28]], 1
|
|
// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
|
|
// CHECK2: omp.inner.for.end:
|
|
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK2: omp.loop.exit:
|
|
// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
|
|
// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK2-NEXT: store ptr [[OUTPUT3]], ptr [[TMP31]], align 4
|
|
// CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
|
|
// CHECK2-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP33]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: switch i32 [[TMP34]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK2-NEXT: ]
|
|
// CHECK2: .omp.reduction.case1:
|
|
// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[OUTPUT3]], align 4
|
|
// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
|
|
// CHECK2-NEXT: store i32 [[ADD12]], ptr [[ARRAYIDX]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP33]], ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.case2:
|
|
// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[OUTPUT3]], align 4
|
|
// CHECK2-NEXT: [[TMP38:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP37]] monotonic, align 4
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.default:
|
|
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
|
|
// CHECK2: omp.precond.end:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined.omp_outlined.omp.reduction.reduction_func
|
|
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined.omp.reduction.reduction_func
|
|
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73
|
|
// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP0]], ptr [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined, i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]])
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined
|
|
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[OUTPUT2:%.*]] = alloca [3 x i32], align 4
|
|
// CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
|
|
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i32 0
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 2
|
|
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT2]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
|
|
// CHECK2: omp.arrayinit.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
|
|
// CHECK2-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
|
|
// CHECK2: omp.arrayinit.done:
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = ptrtoaddr ptr [[TMP3]] to i32
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = ptrtoaddr ptr [[ARRAYIDX]] to i32
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]]
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = sdiv exact i32 [[TMP6]], 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[OUTPUT2]], i32 [[TMP7]]
|
|
// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0
|
|
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
|
|
// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1
|
|
// CHECK2-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
|
|
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]]
|
|
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
|
|
// CHECK2: omp.precond.then:
|
|
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
|
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
|
|
// CHECK2-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]]
|
|
// CHECK2-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK2: cond.true:
|
|
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK2: cond.false:
|
|
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END]]
|
|
// CHECK2: cond.end:
|
|
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ]
|
|
// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK2: omp.inner.for.cond:
|
|
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]]
|
|
// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK2: omp.inner.for.body:
|
|
// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
|
// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP24]], ptr [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP]], align 4
|
|
// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined.omp_outlined, i32 [[TMP22]], i32 [[TMP23]], i32 [[TMP25]], ptr [[TMP26]], ptr [[TMP27]])
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK2: omp.inner.for.inc:
|
|
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
|
|
// CHECK2: omp.inner.for.end:
|
|
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK2: omp.loop.exit:
|
|
// CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]])
|
|
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
|
|
// CHECK2: omp.precond.end:
|
|
// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK2-NEXT: store ptr [[OUTPUT2]], ptr [[TMP32]], align 4
|
|
// CHECK2-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
|
|
// CHECK2-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP34]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK2-NEXT: ]
|
|
// CHECK2: .omp.reduction.case1:
|
|
// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP36]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK2: omp.arraycpy.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[OUTPUT2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], align 4
|
|
// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
|
|
// CHECK2-NEXT: store i32 [[ADD10]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE12:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP36]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK2: omp.arraycpy.done13:
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP34]], ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.case2:
|
|
// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY14:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP39]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY14]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY15:%.*]]
|
|
// CHECK2: omp.arraycpy.body15:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST16:%.*]] = phi ptr [ [[OUTPUT2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY15]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST17:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY15]] ]
|
|
// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], align 4
|
|
// CHECK2-NEXT: [[TMP41:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 [[TMP40]] monotonic, align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP39]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY15]]
|
|
// CHECK2: omp.arraycpy.done21:
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.default:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined.omp_outlined
|
|
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[OUTPUT4:%.*]] = alloca [3 x i32], align 4
|
|
// CHECK2-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
|
|
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
|
|
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
|
|
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
|
|
// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
|
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
|
|
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
|
|
// CHECK2: omp.precond.then:
|
|
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
|
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i32 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 2
|
|
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT4]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP8]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
|
|
// CHECK2: omp.arrayinit.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_PRECOND_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
|
|
// CHECK2-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
|
|
// CHECK2: omp.arrayinit.done:
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = ptrtoaddr ptr [[TMP9]] to i32
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = ptrtoaddr ptr [[ARRAYIDX]] to i32
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = sub i32 [[TMP10]], [[TMP11]]
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = sdiv exact i32 [[TMP12]], 4
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[OUTPUT4]], i32 [[TMP13]]
|
|
// CHECK2-NEXT: store ptr [[TMP14]], ptr [[_TMP5]], align 4
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
|
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
|
|
// CHECK2-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
|
// CHECK2: cond.true:
|
|
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END:%.*]]
|
|
// CHECK2: cond.false:
|
|
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: br label [[COND_END]]
|
|
// CHECK2: cond.end:
|
|
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ]
|
|
// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
|
// CHECK2: omp.inner.for.cond:
|
|
// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
|
// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]]
|
|
// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
|
// CHECK2: omp.inner.for.body:
|
|
// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], ptr [[I6]], align 4
|
|
// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[I6]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 [[TMP26]]
|
|
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4
|
|
// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP5]], align 4
|
|
// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 0
|
|
// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4
|
|
// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], [[TMP27]]
|
|
// CHECK2-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX10]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
|
// CHECK2: omp.body.continue:
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
|
// CHECK2: omp.inner.for.inc:
|
|
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP30]], 1
|
|
// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4
|
|
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
|
|
// CHECK2: omp.inner.for.end:
|
|
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
|
// CHECK2: omp.loop.exit:
|
|
// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP32]])
|
|
// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK2-NEXT: store ptr [[OUTPUT4]], ptr [[TMP33]], align 4
|
|
// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
|
|
// CHECK2-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP35]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK2-NEXT: ]
|
|
// CHECK2: .omp.reduction.case1:
|
|
// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP37]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK2: omp.arraycpy.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[OUTPUT4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4
|
|
// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP38]], [[TMP39]]
|
|
// CHECK2-NEXT: store i32 [[ADD14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP37]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK2: omp.arraycpy.done17:
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.case2:
|
|
// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP40]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]]
|
|
// CHECK2: omp.arraycpy.body19:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[OUTPUT4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY19]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY19]] ]
|
|
// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 4
|
|
// CHECK2-NEXT: [[TMP42:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 [[TMP41]] monotonic, align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT22]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT23]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT22]], [[TMP40]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY19]]
|
|
// CHECK2: omp.arraycpy.done25:
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.default:
|
|
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
|
|
// CHECK2: omp.precond.end:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined.omp_outlined.omp.reduction.reduction_func
|
|
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK2: omp.arraycpy.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK2: omp.arraycpy.done2:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined.omp.reduction.reduction_func
|
|
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 3
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK2: omp.arraycpy.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK2: omp.arraycpy.done2:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78
|
|
// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META13:![0-9]+]], !align [[META14:![0-9]+]]
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78.omp_outlined, i32 [[TMP2]], ptr [[TMP0]])
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78.omp_outlined
|
|
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[A2:%.*]] = alloca [2 x i32], align 4
|
|
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
|
|
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META13]], !align [[META14]]
|
|
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [10 x i32], ptr [[TMP0]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [10 x i32], ptr [[TMP0]], i32 0, i32 1
|
|
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x i32], ptr [[A2]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 2
|
|
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
|
|
// CHECK2: omp.arrayinit.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
|
|
// CHECK2-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
|
|
// CHECK2: omp.arrayinit.done:
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = ptrtoaddr ptr [[TMP0]] to i32
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = ptrtoaddr ptr [[ARRAYIDX]] to i32
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = sdiv exact i32 [[TMP4]], 4
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A2]], i32 [[TMP5]]
|
|
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
|
|
// CHECK2-NEXT: br label [[FOR_COND:%.*]]
|
|
// CHECK2: for.cond:
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]]
|
|
// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
|
|
// CHECK2: for.body:
|
|
// CHECK2-NEXT: br label [[FOR_INC:%.*]]
|
|
// CHECK2: for.inc:
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
|
|
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1
|
|
// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4
|
|
// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
|
|
// CHECK2: for.end:
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK2-NEXT: store ptr [[A2]], ptr [[TMP10]], align 4
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP12]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK2-NEXT: ]
|
|
// CHECK2: .omp.reduction.case1:
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 2
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP14]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK2: omp.arraycpy.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[A2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP14]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK2: omp.arraycpy.done6:
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP12]], ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.case2:
|
|
// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 2
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP17]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]]
|
|
// CHECK2: omp.arraycpy.body8:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[A2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
|
|
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
|
|
// CHECK2-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP18]] monotonic, align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP17]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]]
|
|
// CHECK2: omp.arraycpy.done14:
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.default:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78.omp_outlined.omp.reduction.reduction_func
|
|
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 2
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
|
// CHECK2: omp.arraycpy.body:
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
|
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
|
|
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
|
|
// CHECK2: omp.arraycpy.done2:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81
|
|
// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META13]], !align [[META14]]
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
|
|
// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81.omp_outlined, i32 [[TMP2]], ptr [[TMP0]])
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81.omp_outlined
|
|
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
|
|
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META13]], !align [[META14]]
|
|
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 3
|
|
// CHECK2-NEXT: store i32 0, ptr [[A1]], align 4
|
|
// CHECK2-NEXT: [[TMP1:%.*]] = ptrtoaddr ptr [[TMP0]] to i32
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = ptrtoaddr ptr [[ARRAYIDX]] to i32
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = sub i32 [[TMP1]], [[TMP2]]
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = sdiv exact i32 [[TMP3]], 4
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[A1]], i32 [[TMP4]]
|
|
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
|
|
// CHECK2-NEXT: br label [[FOR_COND:%.*]]
|
|
// CHECK2: for.cond:
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
|
|
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]]
|
|
// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
|
|
// CHECK2: for.body:
|
|
// CHECK2-NEXT: br label [[FOR_INC:%.*]]
|
|
// CHECK2: for.inc:
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4
|
|
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
|
|
// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4
|
|
// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
|
|
// CHECK2: for.end:
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
|
|
// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP9]], align 4
|
|
// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
|
|
// CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP11]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: switch i32 [[TMP12]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
|
// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
|
// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
|
// CHECK2-NEXT: ]
|
|
// CHECK2: .omp.reduction.case1:
|
|
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[A1]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
|
|
// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP11]], ptr @.gomp_critical_user_.reduction.var)
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.case2:
|
|
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[A1]], align 4
|
|
// CHECK2-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP15]] monotonic, align 4
|
|
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
|
// CHECK2: .omp.reduction.default:
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81.omp_outlined.omp.reduction.reduction_func
|
|
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
|
|
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
|
|
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
|
|
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
|
|
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
|
|
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
|
|
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
|
|
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
|
|
// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
|
|
// CHECK2-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK2-LABEL: define {{[^@]+}}@main
|
|
// CHECK2-SAME: () #[[ATTR6:[0-9]+]] {
|
|
// CHECK2-NEXT: entry:
|
|
// CHECK2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: [[ARRAY:%.*]] = alloca ptr, align 4
|
|
// CHECK2-NEXT: [[RESULT:%.*]] = alloca i32, align 4
|
|
// CHECK2-NEXT: store i32 0, ptr [[RETVAL]], align 4
|
|
// CHECK2-NEXT: store i32 100, ptr [[SIZE]], align 4
|
|
// CHECK2-NEXT: [[CALL:%.*]] = call noalias noundef nonnull ptr @_Znaj(i32 noundef 400) #[[ATTR8:[0-9]+]]
|
|
// CHECK2-NEXT: store ptr [[CALL]], ptr [[ARRAY]], align 4
|
|
// CHECK2-NEXT: store i32 0, ptr [[RESULT]], align 4
|
|
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAY]], align 4
|
|
// CHECK2-NEXT: call void @_Z3sumPiiS_(ptr noundef [[TMP0]], i32 noundef 100, ptr noundef [[RESULT]])
|
|
// CHECK2-NEXT: ret i32 0
|
|
//
|