Nikita Popov c23b4fbdbb
[IR] Remove size argument from lifetime intrinsics (#150248)
Now that #149310 has restricted lifetime intrinsics to only work on
allocas, we can also drop the explicit size argument. Instead, the size
is implied by the alloca.

This removes the ability to only mark a prefix of an alloca alive/dead.
We never used that capability, so we should remove the need to handle
that possibility everywhere (though many key places, including stack
coloring, did not actually respect this).
2025-08-08 11:09:34 +02:00

3378 lines
237 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED1
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED2
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=NVPTX-DISABLED1
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX-DISABLED2
;; void unknown(void);
;; [[omp::assume("ompx_spmd_amenable")]] void spmd_amenable(void);
;;
;; void sequential_loop() {
;; #pragma omp target teams
;; {
;; for (int i = 0; i < 100; ++i) {
;; #pragma omp parallel
;; {
;; unknown();
;; }
;; }
;; spmd_amenable();
;; }
;; }
;;
;; [[omp::assume("ompx_spmd_amenable")]] void use(__attribute__((noescape)) int *);
;;
;; void sequential_loop_to_stack_var() {
;; #pragma omp target teams
;; {
;; int x;
;; use(&x);
;; for (int i = 0; i < 100; ++i) {
;; #pragma omp parallel
;; {
;; unknown();
;; }
;; }
;; spmd_amenable();
;; }
;; }
;;
;; void sequential_loop_to_shared_var() {
;; #pragma omp target teams
;; {
;; int x;
;; for (int i = 0; i < 100; ++i) {
;; #pragma omp parallel
;; {
;; x++;
;; unknown();
;; }
;; }
;; spmd_amenable();
;; }
;; }
;;
;; void sequential_loop_to_shared_var_guarded() {
;; #pragma omp target teams
;; {
;; int x = 42;
;; for (int i = 0; i < 100; ++i) {
;; #pragma omp parallel
;; {
;; x++;
;; unknown();
;; }
;; }
;; spmd_amenable();
;; }
;; }
;;
;; void do_not_spmdize_target() {
;; #pragma omp target teams
;; {
;; // Incompatible parallel level, called both
;; // from parallel and target regions
;; unknown();
;; }
;; }
;;
;; void do_not_spmdize_task() {
;; #pragma omp target
;; {
;; #pragma omp task
;; spmd_amenable();
;; #pragma omp parallel
;; unknown();
;; }
;; }
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
%struct.kmp_task_t = type { ptr, ptr, i32, %union.kmp_cmplrdata_t, %union.kmp_cmplrdata_t }
%union.kmp_cmplrdata_t = type { ptr }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
; Function Attrs: alwaysinline convergent norecurse nounwind
;.
; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; AMDGPU: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
; AMDGPU: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
; AMDGPU: @x_shared.1 = internal addrspace(3) global [4 x i8] poison, align 4
; AMDGPU: @__omp_outlined__9_wrapper.ID = private constant i8 undef
;.
; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
; NVPTX: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
; NVPTX: @x_shared1 = internal addrspace(3) global [4 x i8] poison, align 4
; NVPTX: @__omp_outlined__9_wrapper.ID = private constant i8 undef
;.
; AMDGPU-DISABLED1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; AMDGPU-DISABLED1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU-DISABLED1: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
; AMDGPU-DISABLED1: @x_shared.1 = internal addrspace(3) global [4 x i8] poison, align 4
; AMDGPU-DISABLED1: @__omp_outlined__1_wrapper.ID = private constant i8 undef
; AMDGPU-DISABLED1: @__omp_outlined__3_wrapper.ID = private constant i8 undef
; AMDGPU-DISABLED1: @__omp_outlined__5_wrapper.ID = private constant i8 undef
; AMDGPU-DISABLED1: @__omp_outlined__7_wrapper.ID = private constant i8 undef
; AMDGPU-DISABLED1: @__omp_outlined__9_wrapper.ID = private constant i8 undef
;.
; AMDGPU-DISABLED2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; AMDGPU-DISABLED2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; AMDGPU-DISABLED2: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
; AMDGPU-DISABLED2: @x_shared.1 = internal addrspace(3) global [4 x i8] poison, align 4
;.
; NVPTX-DISABLED1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; NVPTX-DISABLED1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX-DISABLED1: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
; NVPTX-DISABLED1: @x_shared1 = internal addrspace(3) global [4 x i8] poison, align 4
; NVPTX-DISABLED1: @__omp_outlined__1_wrapper.ID = private constant i8 undef
; NVPTX-DISABLED1: @__omp_outlined__3_wrapper.ID = private constant i8 undef
; NVPTX-DISABLED1: @__omp_outlined__5_wrapper.ID = private constant i8 undef
; NVPTX-DISABLED1: @__omp_outlined__7_wrapper.ID = private constant i8 undef
; NVPTX-DISABLED1: @__omp_outlined__9_wrapper.ID = private constant i8 undef
;.
; NVPTX-DISABLED2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; NVPTX-DISABLED2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX-DISABLED2: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
; NVPTX-DISABLED2: @x_shared1 = internal addrspace(3) global [4 x i8] poison, align 4
;.
define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] {
; AMDGPU-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
; NVPTX-SAME: () #[[ATTR0:[0-9]+]] {
; NVPTX-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
; AMDGPU-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] {
; AMDGPU-DISABLED1-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
; AMDGPU-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] {
; AMDGPU-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
; NVPTX-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] {
; NVPTX-DISABLED1-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
; NVPTX-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] {
; NVPTX-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED2-NEXT: ret void
;
call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
ret void
}
define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU: common.ret:
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; NVPTX-SAME: () #[[ATTR1:[0-9]+]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX: common.ret:
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; AMDGPU-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED1: is_worker_check:
; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.begin:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.finished:
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1:
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; AMDGPU-DISABLED1: thread.user_code.check:
; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU-DISABLED1: common.ret:
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; AMDGPU-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU-DISABLED2: common.ret:
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; NVPTX-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED1: is_worker_check:
; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX-DISABLED1: worker_state_machine.begin:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX-DISABLED1: worker_state_machine.finished:
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: worker_state_machine.is_active.check:
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1:
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; NVPTX-DISABLED1: worker_state_machine.done.barrier:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; NVPTX-DISABLED1: thread.user_code.check:
; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX-DISABLED1: common.ret:
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; NVPTX-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX-DISABLED2: common.ret:
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
;
entry:
%.zero.addr = alloca ptr, align 8, addrspace(5)
%.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
%.threadid_temp. = alloca ptr, align 8, addrspace(5)
%.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 0, ptr %.zero.addr.cast, align 4
store i32 %1, ptr %.threadid_temp..cast, align 4, !tbaa !12
call void @__omp_outlined__(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
br label %common.ret
}
define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU: for.cond:
; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; AMDGPU: for.cond.cleanup:
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: br label [[FOR_COND:%.*]]
; NVPTX: for.cond:
; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; NVPTX: for.cond.cleanup:
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED1: for.cond:
; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; AMDGPU-DISABLED1: for.cond.cleanup:
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED2: for.cond:
; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; AMDGPU-DISABLED2: for.cond.cleanup:
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED1: for.cond:
; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; NVPTX-DISABLED1: for.cond.cleanup:
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED2: for.cond:
; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; NVPTX-DISABLED2: for.cond.cleanup:
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
entry:
%captured_vars_addrs = alloca ptr, align 8, addrspace(5)
%captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
br label %for.cond
for.cond: ; preds = %for.body, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%cmp = icmp slt i32 %i.0, 100
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
call void @spmd_amenable() #6
ret void
for.body: ; preds = %for.cond
%0 = load i32, ptr %.global_tid., align 4, !tbaa !12
call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr %captured_vars_addrs.cast, i64 0)
%inc = add nsw i32 %i.0, 1
br label %for.cond, !llvm.loop !16
}
define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
call void @unknown() #7
ret void
}
; Function Attrs: convergent norecurse nounwind
define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
%.addr1 = alloca ptr, align 8, addrspace(5)
%.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
%.zero.addr = alloca ptr, align 8, addrspace(5)
%.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
%global_args = alloca ptr, align 8, addrspace(5)
%global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
store i32 %1, ptr %.addr1.cast, align 4, !tbaa !12
store i32 0, ptr %.zero.addr.cast, align 4
call void @__kmpc_get_shared_variables(ptr %global_args.cast)
call void @__omp_outlined__1(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
; Function Attrs: alwaysinline convergent norecurse nounwind
define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20() #0 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; AMDGPU-SAME: () #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU: common.ret:
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; NVPTX-SAME: () #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX: common.ret:
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED1: is_worker_check:
; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.begin:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.finished:
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1:
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; AMDGPU-DISABLED1: thread.user_code.check:
; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU-DISABLED1: common.ret:
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU-DISABLED2: common.ret:
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED1: is_worker_check:
; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX-DISABLED1: worker_state_machine.begin:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX-DISABLED1: worker_state_machine.finished:
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: worker_state_machine.is_active.check:
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1:
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; NVPTX-DISABLED1: worker_state_machine.done.barrier:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; NVPTX-DISABLED1: thread.user_code.check:
; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX-DISABLED1: common.ret:
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX-DISABLED2: common.ret:
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
;
entry:
%.zero.addr = alloca ptr, align 8, addrspace(5)
%.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
%.threadid_temp. = alloca ptr, align 8, addrspace(5)
%.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 0, ptr %.zero.addr.cast, align 4
store i32 %1, ptr %.threadid_temp..cast, align 4, !tbaa !12
call void @__omp_outlined__2(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
br label %common.ret
}
define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
; AMDGPU-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU: for.cond:
; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; AMDGPU: for.cond.cleanup:
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
; NVPTX-NEXT: br label [[FOR_COND:%.*]]
; NVPTX: for.cond:
; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; NVPTX: for.cond.cleanup:
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED1-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
; AMDGPU-DISABLED1-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED1: for.cond:
; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; AMDGPU-DISABLED1: for.cond.cleanup:
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED2-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
; AMDGPU-DISABLED2-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED2: for.cond:
; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; AMDGPU-DISABLED2: for.cond.cleanup:
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED1-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED1: for.cond:
; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; NVPTX-DISABLED1: for.cond.cleanup:
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED2-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED2: for.cond:
; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; NVPTX-DISABLED2: for.cond.cleanup:
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
entry:
%captured_vars_addrs = alloca ptr, align 8, addrspace(5)
%captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
call void @use(ptr captures(none) %x) #6
br label %for.cond
for.cond: ; preds = %for.body, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%cmp = icmp slt i32 %i.0, 100
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
call void @spmd_amenable() #6
call void @__kmpc_free_shared(ptr %x, i64 4)
ret void
for.body: ; preds = %for.cond
%0 = load i32, ptr %.global_tid., align 4, !tbaa !12
call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs.cast, i64 0)
%inc = add nsw i32 %i.0, 1
br label %for.cond, !llvm.loop !19
}
define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
call void @unknown() #7
ret void
}
; Function Attrs: convergent norecurse nounwind
define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
%.addr1 = alloca ptr, align 8, addrspace(5)
%.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
%.zero.addr = alloca ptr, align 8, addrspace(5)
%.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
%global_args = alloca ptr, align 8, addrspace(5)
%global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
store i32 %1, ptr %.addr1.cast, align 4, !tbaa !12
store i32 0, ptr %.zero.addr.cast, align 4
call void @__kmpc_get_shared_variables(ptr %global_args.cast)
call void @__omp_outlined__3(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
; Function Attrs: alwaysinline convergent norecurse nounwind
define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35() #0 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; AMDGPU-SAME: () #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU: common.ret:
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; NVPTX-SAME: () #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX: common.ret:
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED1: is_worker_check:
; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.begin:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.finished:
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1:
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; AMDGPU-DISABLED1: thread.user_code.check:
; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU-DISABLED1: common.ret:
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU-DISABLED2: common.ret:
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED1: is_worker_check:
; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX-DISABLED1: worker_state_machine.begin:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX-DISABLED1: worker_state_machine.finished:
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: worker_state_machine.is_active.check:
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1:
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; NVPTX-DISABLED1: worker_state_machine.done.barrier:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; NVPTX-DISABLED1: thread.user_code.check:
; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX-DISABLED1: common.ret:
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX-DISABLED2: common.ret:
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
;
entry:
%.zero.addr = alloca ptr, align 8, addrspace(5)
%.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
%.threadid_temp. = alloca ptr, align 8, addrspace(5)
%.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 0, ptr %.zero.addr.cast, align 4
store i32 %1, ptr %.threadid_temp..cast, align 4, !tbaa !12
call void @__omp_outlined__4(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
br label %common.ret
}
define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU: for.cond:
; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; AMDGPU: for.cond.cleanup:
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: br label [[FOR_COND:%.*]]
; NVPTX: for.cond:
; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; NVPTX: for.cond.cleanup:
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED1: for.cond:
; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; AMDGPU-DISABLED1: for.cond.cleanup:
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED2: for.cond:
; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; AMDGPU-DISABLED2: for.cond.cleanup:
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED1: for.cond:
; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; NVPTX-DISABLED1: for.cond.cleanup:
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED2: for.cond:
; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; NVPTX-DISABLED2: for.cond.cleanup:
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
entry:
%captured_vars_addrs = alloca ptr, align 8, addrspace(5)
%captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
br label %for.cond
for.cond: ; preds = %for.body, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%cmp = icmp slt i32 %i.0, 100
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
call void @spmd_amenable() #6
call void @__kmpc_free_shared(ptr %x, i64 4)
ret void
for.body: ; preds = %for.cond
store ptr %x, ptr %captured_vars_addrs.cast, align 8, !tbaa !20
%0 = load i32, ptr %.global_tid., align 4, !tbaa !12
call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs.cast, i64 1)
%inc = add nsw i32 %i.0, 1
br label %for.cond, !llvm.loop !22
}
define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
%0 = load i32, ptr %x, align 4, !tbaa !12
%inc = add nsw i32 %0, 1
store i32 %inc, ptr %x, align 4, !tbaa !12
call void @unknown() #7
ret void
}
; Function Attrs: convergent norecurse nounwind
define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
%.addr1 = alloca ptr, align 8, addrspace(5)
%.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
%.zero.addr = alloca ptr, align 8, addrspace(5)
%.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
%global_args = alloca ptr, align 8, addrspace(5)
%global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
store i32 %1, ptr %.addr1.cast, align 4, !tbaa !12
store i32 0, ptr %.zero.addr.cast, align 4
call void @__kmpc_get_shared_variables(ptr %global_args.cast)
%2 = load ptr, ptr %global_args.cast, align 8
%3 = load ptr, ptr %2, align 8, !tbaa !20
call void @__omp_outlined__5(ptr %.addr1.cast, ptr %.zero.addr.cast, ptr %3) #3
ret void
}
; Function Attrs: alwaysinline convergent norecurse nounwind
define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50() #0 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; AMDGPU-SAME: () #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU: common.ret:
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; NVPTX-SAME: () #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX: common.ret:
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED1: is_worker_check:
; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.begin:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.finished:
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1:
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; AMDGPU-DISABLED1: thread.user_code.check:
; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU-DISABLED1: common.ret:
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU-DISABLED2: common.ret:
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED1: is_worker_check:
; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX-DISABLED1: worker_state_machine.begin:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX-DISABLED1: worker_state_machine.finished:
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: worker_state_machine.is_active.check:
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1:
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; NVPTX-DISABLED1: worker_state_machine.done.barrier:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; NVPTX-DISABLED1: thread.user_code.check:
; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX-DISABLED1: common.ret:
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX-DISABLED2: common.ret:
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
;
entry:
%.zero.addr = alloca ptr, align 8, addrspace(5)
%.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
%.threadid_temp. = alloca ptr, align 8, addrspace(5)
%.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 0, ptr %.zero.addr.cast, align 4
store i32 %1, ptr %.threadid_temp..cast, align 4, !tbaa !12
call void @__omp_outlined__6(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
br label %common.ret
}
define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: br label [[REGION_CHECK_TID:%.*]]
; AMDGPU: region.check.tid:
; AMDGPU-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
; AMDGPU-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
; AMDGPU-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; AMDGPU: region.guarded:
; AMDGPU-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: br label [[REGION_GUARDED_END:%.*]]
; AMDGPU: region.guarded.end:
; AMDGPU-NEXT: br label [[REGION_BARRIER]]
; AMDGPU: region.barrier:
; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]])
; AMDGPU-NEXT: br label [[REGION_EXIT:%.*]]
; AMDGPU: region.exit:
; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU: for.cond:
; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; AMDGPU: for.cond.cleanup:
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; AMDGPU-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: br label [[REGION_CHECK_TID:%.*]]
; NVPTX: region.check.tid:
; NVPTX-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
; NVPTX-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
; NVPTX-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; NVPTX: region.guarded:
; NVPTX-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: br label [[REGION_GUARDED_END:%.*]]
; NVPTX: region.guarded.end:
; NVPTX-NEXT: br label [[REGION_BARRIER]]
; NVPTX: region.barrier:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]])
; NVPTX-NEXT: br label [[REGION_EXIT:%.*]]
; NVPTX: region.exit:
; NVPTX-NEXT: br label [[FOR_COND:%.*]]
; NVPTX: for.cond:
; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; NVPTX: for.cond.cleanup:
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; NVPTX-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED1: for.cond:
; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; AMDGPU-DISABLED1: for.cond.cleanup:
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED2: for.cond:
; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; AMDGPU-DISABLED2: for.cond.cleanup:
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED1: for.cond:
; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; NVPTX-DISABLED1: for.cond.cleanup:
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED2: for.cond:
; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; NVPTX-DISABLED2: for.cond.cleanup:
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
entry:
%captured_vars_addrs = alloca ptr, align 8, addrspace(5)
%captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
store i32 42, ptr %x, align 4, !tbaa !12
br label %for.cond
for.cond: ; preds = %for.body, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%cmp = icmp slt i32 %i.0, 100
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
call void @spmd_amenable() #6
call void @__kmpc_free_shared(ptr %x, i64 4)
ret void
for.body: ; preds = %for.cond
store ptr %x, ptr %captured_vars_addrs.cast, align 8, !tbaa !20
%0 = load i32, ptr %.global_tid., align 4, !tbaa !12
call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs.cast, i64 1)
%inc = add nsw i32 %i.0, 1
br label %for.cond, !llvm.loop !23
}
define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
%0 = load i32, ptr %x, align 4, !tbaa !12
%inc = add nsw i32 %0, 1
store i32 %inc, ptr %x, align 4, !tbaa !12
call void @unknowni32p(ptr %x) #7
ret void
}
; Function Attrs: convergent norecurse nounwind
define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
%.addr1 = alloca ptr, align 8, addrspace(5)
%.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
%.zero.addr = alloca ptr, align 8, addrspace(5)
%.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
%global_args = alloca ptr, align 8, addrspace(5)
%global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
store i32 %1, ptr %.addr1.cast, align 4, !tbaa !12
store i32 0, ptr %.zero.addr.cast, align 4
call void @__kmpc_get_shared_variables(ptr %global_args.cast)
%2 = load ptr, ptr %global_args.cast, align 8
%3 = load ptr, ptr %2, align 8, !tbaa !20
call void @__omp_outlined__7(ptr %.addr1.cast, ptr %.zero.addr.cast, ptr %3) #3
ret void
}
; Function Attrs: alwaysinline convergent norecurse nounwind
define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; AMDGPU-SAME: () #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: worker_state_machine.begin:
; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; AMDGPU: worker_state_machine.finished:
; AMDGPU-NEXT: ret void
; AMDGPU: worker_state_machine.is_active.check:
; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; AMDGPU: worker_state_machine.parallel_region.fallback.execute:
; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; AMDGPU: worker_state_machine.parallel_region.end:
; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; AMDGPU: worker_state_machine.done.barrier:
; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; AMDGPU: thread.user_code.check:
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU: common.ret:
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; NVPTX-SAME: () #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX: worker_state_machine.finished:
; NVPTX-NEXT: ret void
; NVPTX: worker_state_machine.is_active.check:
; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; NVPTX: worker_state_machine.parallel_region.fallback.execute:
; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; NVPTX: worker_state_machine.parallel_region.end:
; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; NVPTX: worker_state_machine.done.barrier:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; NVPTX: thread.user_code.check:
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX: common.ret:
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED1: is_worker_check:
; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.begin:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.finished:
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; AMDGPU-DISABLED1: thread.user_code.check:
; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU-DISABLED1: common.ret:
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU-DISABLED2: common.ret:
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED1: is_worker_check:
; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX-DISABLED1: worker_state_machine.begin:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX-DISABLED1: worker_state_machine.finished:
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: worker_state_machine.is_active.check:
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; NVPTX-DISABLED1: worker_state_machine.done.barrier:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; NVPTX-DISABLED1: thread.user_code.check:
; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX-DISABLED1: common.ret:
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX-DISABLED2: common.ret:
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
;
entry:
%.zero.addr = alloca ptr, align 8, addrspace(5)
%.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
%.threadid_temp. = alloca ptr, align 8, addrspace(5)
%.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 0, ptr %.zero.addr.cast, align 4
store i32 %1, ptr %.threadid_temp..cast, align 4, !tbaa !12
call void @__omp_outlined__8(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
br label %common.ret
}
define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
call void @unknown() #7
ret void
}
; Function Attrs: alwaysinline convergent norecurse nounwind
define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; AMDGPU-SAME: () #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: worker_state_machine.begin:
; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; AMDGPU: worker_state_machine.finished:
; AMDGPU-NEXT: ret void
; AMDGPU: worker_state_machine.is_active.check:
; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; AMDGPU: worker_state_machine.parallel_region.check:
; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
; AMDGPU: worker_state_machine.parallel_region.execute:
; AMDGPU-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; AMDGPU: worker_state_machine.parallel_region.fallback.execute:
; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; AMDGPU: worker_state_machine.parallel_region.end:
; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; AMDGPU: worker_state_machine.done.barrier:
; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; AMDGPU: thread.user_code.check:
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU: common.ret:
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; AMDGPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; NVPTX-SAME: () #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX: worker_state_machine.finished:
; NVPTX-NEXT: ret void
; NVPTX: worker_state_machine.is_active.check:
; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; NVPTX: worker_state_machine.parallel_region.check:
; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
; NVPTX: worker_state_machine.parallel_region.execute:
; NVPTX-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; NVPTX: worker_state_machine.parallel_region.fallback.execute:
; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; NVPTX: worker_state_machine.parallel_region.end:
; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; NVPTX: worker_state_machine.done.barrier:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; NVPTX: thread.user_code.check:
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX: common.ret:
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; NVPTX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED1: is_worker_check:
; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.begin:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.finished:
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
; AMDGPU-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; AMDGPU-DISABLED1: thread.user_code.check:
; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU-DISABLED1: common.ret:
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU-DISABLED2: common.ret:
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED1: is_worker_check:
; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX-DISABLED1: worker_state_machine.begin:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX-DISABLED1: worker_state_machine.finished:
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: worker_state_machine.is_active.check:
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
; NVPTX-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; NVPTX-DISABLED1: worker_state_machine.done.barrier:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; NVPTX-DISABLED1: thread.user_code.check:
; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX-DISABLED1: common.ret:
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX-DISABLED2: common.ret:
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
;
entry:
%captured_vars_addrs = alloca ptr, align 8, addrspace(5)
%captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
%2 = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %1, i32 1, i64 40, i64 0, ptr @"_omp_task_entry$")
%3 = call i32 @__kmpc_omp_task(ptr @1, i32 %1, ptr %2)
call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr %captured_vars_addrs.cast, i64 0)
call void @__kmpc_target_deinit()
br label %common.ret
}
; Function Attrs: alwaysinline convergent nounwind
define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id., ptr noalias %.privates., ptr noalias %.copy_fn., ptr %.task_t., ptr noalias %__context) #2 {
; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined.
; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@.omp_outlined.
; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined.
; AMDGPU-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined.
; AMDGPU-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined.
; NVPTX-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined.
; NVPTX-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
call void @spmd_amenable() #6
ret void
}
; Function Attrs: convergent norecurse nounwind
define internal i32 @"_omp_task_entry$"(i32 %0, ptr noalias %1) #1 {
entry:
%2 = getelementptr inbounds %struct.kmp_task_t, ptr %1, i32 0, i32 2
%3 = load ptr, ptr %1, align 8, !tbaa !24
call void @.omp_outlined.(i32 %0, ptr %2, ptr null, ptr null, ptr %1, ptr %3) #3
ret i32 0
}
; Function Attrs: nounwind
declare ptr @__kmpc_omp_task_alloc(ptr, i32, i32, i64, i64, ptr) #3
; Function Attrs: nounwind
declare i32 @__kmpc_omp_task(ptr, i32, ptr) #3
; Function Attrs: nosync nounwind
declare void @__kmpc_free_shared(ptr captures(none), i64) #4
; Function Attrs: nofree nosync nounwind
declare ptr @__kmpc_alloc_shared(i64) #5
; Function Attrs: convergent
declare void @use(ptr captures(none)) #6
; Function Attrs: convergent
declare void @unknown() #7
; Function Attrs: convergent
declare void @unknowni32p(ptr) #7
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(ptr captures(none)) #8
define weak i32 @__kmpc_target_init(ptr %0, ptr %1) {
; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
; AMDGPU-NEXT: ret i32 0
;
; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init
; NVPTX-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
; NVPTX-NEXT: ret i32 0
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init
; AMDGPU-DISABLED1-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
; AMDGPU-DISABLED1-NEXT: ret i32 0
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init
; AMDGPU-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
; AMDGPU-DISABLED2-NEXT: ret i32 0
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init
; NVPTX-DISABLED1-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
; NVPTX-DISABLED1-NEXT: ret i32 0
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init
; NVPTX-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
; NVPTX-DISABLED2-NEXT: ret i32 0
;
ret i32 0
}
declare void @__kmpc_get_shared_variables(ptr)
; Function Attrs: alwaysinline
declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) #9
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(ptr captures(none)) #8
; Function Attrs: convergent
declare void @spmd_amenable() #6
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(ptr) #3
declare void @__kmpc_target_deinit()
define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
call void @unknown() #7
ret void
}
; Function Attrs: convergent norecurse nounwind
define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #1 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
%.addr1 = alloca ptr, align 8, addrspace(5)
%.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
%.zero.addr = alloca ptr, align 8, addrspace(5)
%.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
%global_args = alloca ptr, align 8, addrspace(5)
%global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
store i32 %1, ptr %.addr1.cast, align 4, !tbaa !12
store i32 0, ptr %.zero.addr.cast, align 4
call void @__kmpc_get_shared_variables(ptr %global_args.cast)
call void @__omp_outlined__9(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
declare fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
attributes #0 = { alwaysinline convergent norecurse nounwind "kernel" }
attributes #1 = { convergent norecurse nounwind }
attributes #2 = { alwaysinline convergent nounwind }
attributes #3 = { nounwind }
attributes #4 = { nosync nounwind }
attributes #5 = { nofree nosync nounwind }
attributes #6 = { convergent "llvm.assume"="ompx_spmd_amenable" }
attributes #7 = { convergent }
attributes #8 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #9 = { alwaysinline }
!omp_offload.info = !{!0, !1, !2, !3, !4, !5}
!llvm.module.flags = !{!6, !7, !8, !9, !10}
!llvm.ident = !{!11}
!0 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
!1 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
!2 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
!3 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
!4 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
!5 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
!6 = !{i32 1, !"wchar_size", i32 4}
!7 = !{i32 7, !"openmp", i32 50}
!8 = !{i32 7, !"openmp-device", i32 50}
!9 = !{i32 8, !"PIC Level", i32 2}
!10 = !{i32 7, !"frame-pointer", i32 2}
!11 = !{!"clang version 14.0.0"}
!12 = !{!13, !13, i64 0}
!13 = !{!"int", !14, i64 0}
!14 = !{!"omnipotent char", !15, i64 0}
!15 = !{!"Simple C/C++ TBAA"}
!16 = distinct !{!16, !17, !18}
!17 = !{!"llvm.loop.mustprogress"}
!18 = !{!"llvm.loop.unroll.disable"}
!19 = distinct !{!19, !17, !18}
!20 = !{!21, !21, i64 0}
!21 = !{!"any pointer", !14, i64 0}
!22 = distinct !{!22, !17, !18}
!23 = distinct !{!23, !17, !18}
!24 = !{!25, !21, i64 0}
!25 = !{!"kmp_task_t_with_privates", !26, i64 0}
!26 = !{!"kmp_task_t", !21, i64 0, !21, i64 8, !13, i64 16, !14, i64 24, !14, i64 32}
;.
; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
; AMDGPU: attributes #[[ATTR1]] = { norecurse }
; AMDGPU: attributes #[[ATTR2]] = { convergent norecurse nounwind }
; AMDGPU: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
; AMDGPU: attributes #[[ATTR4]] = { nounwind }
; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
; AMDGPU: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
; AMDGPU: attributes #[[ATTR8]] = { convergent }
; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
; AMDGPU: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
; AMDGPU: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
;.
; NVPTX: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
; NVPTX: attributes #[[ATTR1]] = { norecurse }
; NVPTX: attributes #[[ATTR2]] = { convergent norecurse nounwind }
; NVPTX: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
; NVPTX: attributes #[[ATTR4]] = { nounwind }
; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
; NVPTX: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
; NVPTX: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
; NVPTX: attributes #[[ATTR8]] = { convergent }
; NVPTX: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
; NVPTX: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
; NVPTX: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
;.
; AMDGPU-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
; AMDGPU-DISABLED1: attributes #[[ATTR1]] = { norecurse }
; AMDGPU-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind }
; AMDGPU-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
; AMDGPU-DISABLED1: attributes #[[ATTR4]] = { nounwind }
; AMDGPU-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
; AMDGPU-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
; AMDGPU-DISABLED1: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
; AMDGPU-DISABLED1: attributes #[[ATTR8]] = { convergent }
; AMDGPU-DISABLED1: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
; AMDGPU-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
; AMDGPU-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
;.
; AMDGPU-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
; AMDGPU-DISABLED2: attributes #[[ATTR1]] = { norecurse }
; AMDGPU-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind }
; AMDGPU-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
; AMDGPU-DISABLED2: attributes #[[ATTR4]] = { nounwind }
; AMDGPU-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
; AMDGPU-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
; AMDGPU-DISABLED2: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
; AMDGPU-DISABLED2: attributes #[[ATTR8]] = { convergent }
; AMDGPU-DISABLED2: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
; AMDGPU-DISABLED2: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
;.
; NVPTX-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
; NVPTX-DISABLED1: attributes #[[ATTR1]] = { norecurse }
; NVPTX-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind }
; NVPTX-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
; NVPTX-DISABLED1: attributes #[[ATTR4]] = { nounwind }
; NVPTX-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
; NVPTX-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
; NVPTX-DISABLED1: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
; NVPTX-DISABLED1: attributes #[[ATTR8]] = { convergent }
; NVPTX-DISABLED1: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
; NVPTX-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
; NVPTX-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
;.
; NVPTX-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
; NVPTX-DISABLED2: attributes #[[ATTR1]] = { norecurse }
; NVPTX-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind }
; NVPTX-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
; NVPTX-DISABLED2: attributes #[[ATTR4]] = { nounwind }
; NVPTX-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
; NVPTX-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
; NVPTX-DISABLED2: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
; NVPTX-DISABLED2: attributes #[[ATTR8]] = { convergent }
; NVPTX-DISABLED2: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
; NVPTX-DISABLED2: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
;.
; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
; AMDGPU: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
; AMDGPU: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; AMDGPU: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; AMDGPU: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; AMDGPU: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
; AMDGPU: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
; AMDGPU: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
; AMDGPU: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
; AMDGPU: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
; AMDGPU: [[META15]] = !{!"Simple C/C++ TBAA"}
; AMDGPU: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
; AMDGPU: [[META17]] = !{!"llvm.loop.mustprogress"}
; AMDGPU: [[META18]] = !{!"llvm.loop.unroll.disable"}
; AMDGPU: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
; AMDGPU: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
; AMDGPU: [[META21]] = !{!"any pointer", [[META14]], i64 0}
; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
; AMDGPU: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
; NVPTX: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
; NVPTX: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; NVPTX: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; NVPTX: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; NVPTX: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
; NVPTX: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
; NVPTX: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
; NVPTX: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
; NVPTX: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
; NVPTX: [[META15]] = !{!"Simple C/C++ TBAA"}
; NVPTX: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
; NVPTX: [[META17]] = !{!"llvm.loop.mustprogress"}
; NVPTX: [[META18]] = !{!"llvm.loop.unroll.disable"}
; NVPTX: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
; NVPTX: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
; NVPTX: [[META21]] = !{!"any pointer", [[META14]], i64 0}
; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
; NVPTX: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; AMDGPU-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; AMDGPU-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
; AMDGPU-DISABLED1: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
; AMDGPU-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; AMDGPU-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; AMDGPU-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
; AMDGPU-DISABLED1: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; AMDGPU-DISABLED1: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; AMDGPU-DISABLED1: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; AMDGPU-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
; AMDGPU-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
; AMDGPU-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
; AMDGPU-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
; AMDGPU-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
; AMDGPU-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
; AMDGPU-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"}
; AMDGPU-DISABLED1: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
; AMDGPU-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"}
; AMDGPU-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"}
; AMDGPU-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
; AMDGPU-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
; AMDGPU-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0}
; AMDGPU-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
; AMDGPU-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; AMDGPU-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; AMDGPU-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
; AMDGPU-DISABLED2: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
; AMDGPU-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; AMDGPU-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; AMDGPU-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
; AMDGPU-DISABLED2: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; AMDGPU-DISABLED2: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; AMDGPU-DISABLED2: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; AMDGPU-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
; AMDGPU-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
; AMDGPU-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
; AMDGPU-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
; AMDGPU-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
; AMDGPU-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
; AMDGPU-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"}
; AMDGPU-DISABLED2: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
; AMDGPU-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"}
; AMDGPU-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"}
; AMDGPU-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
; AMDGPU-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
; AMDGPU-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0}
; AMDGPU-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
; AMDGPU-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; NVPTX-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; NVPTX-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
; NVPTX-DISABLED1: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
; NVPTX-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; NVPTX-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; NVPTX-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
; NVPTX-DISABLED1: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; NVPTX-DISABLED1: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; NVPTX-DISABLED1: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; NVPTX-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
; NVPTX-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
; NVPTX-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
; NVPTX-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
; NVPTX-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
; NVPTX-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
; NVPTX-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"}
; NVPTX-DISABLED1: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
; NVPTX-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"}
; NVPTX-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"}
; NVPTX-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
; NVPTX-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
; NVPTX-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0}
; NVPTX-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
; NVPTX-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; NVPTX-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; NVPTX-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
; NVPTX-DISABLED2: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
; NVPTX-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; NVPTX-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; NVPTX-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
; NVPTX-DISABLED2: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; NVPTX-DISABLED2: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; NVPTX-DISABLED2: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; NVPTX-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
; NVPTX-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
; NVPTX-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
; NVPTX-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
; NVPTX-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
; NVPTX-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
; NVPTX-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"}
; NVPTX-DISABLED2: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
; NVPTX-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"}
; NVPTX-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"}
; NVPTX-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
; NVPTX-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
; NVPTX-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0}
; NVPTX-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
; NVPTX-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.