
Specifying a kernel with the `ptx_kernel` or `amdgpu_kernel` calling convention is a more idiomatic and compile-time performant than using the `nvvm.annoation !"kernel"` metadata. Transition OMPIRBuilder to use calling conventions for PTX kernels and no longer emit `nvvm.annoation`. Update OpenMPOpt to work with kernels specified via calling convention as well as metadata. Update OpenMP tests to use the calling conventions.
108 lines
4.9 KiB
LLVM
108 lines
4.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs
|
|
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
|
|
|
|
%struct.ident_t = type { i32, i32, i32, i32, ptr }
|
|
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
|
|
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
|
|
|
|
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
|
|
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
|
|
@_ZL6Device = internal global double 0.000000e+00, align 8
|
|
@__omp_offloading_fd02_85283c04_main_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
|
|
|
|
define weak ptx_kernel void @__omp_offloading_fd02_85283c04_main_l11(ptr %dyn, ptr nonnull align 8 dereferenceable(8) %X) local_unnamed_addr "kernel" {
|
|
entry:
|
|
%0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment, ptr %dyn) #0
|
|
%exec_user_code = icmp eq i32 %0, -1
|
|
br i1 %exec_user_code, label %user_code.entry, label %common.ret
|
|
|
|
common.ret:
|
|
ret void
|
|
|
|
user_code.entry:
|
|
%1 = load double, ptr @_ZL6Device, align 8, !tbaa !11
|
|
%2 = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #0
|
|
%3 = icmp eq i32 %2, 0
|
|
br i1 %3, label %region.guarded, label %region.barrier
|
|
|
|
region.guarded:
|
|
store double %1, ptr %X, align 8, !tbaa !11
|
|
br label %region.barrier
|
|
|
|
region.barrier:
|
|
tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @1, i32 %2)
|
|
tail call void @__kmpc_target_deinit() #0
|
|
br label %common.ret
|
|
}
|
|
|
|
declare i32 @__kmpc_target_init(ptr, ptr) local_unnamed_addr
|
|
|
|
declare void @__kmpc_target_deinit() local_unnamed_addr
|
|
|
|
define weak ptx_kernel void @__omp_offloading__fd02_85283c04_Device_l6_ctor() "kernel" {
|
|
entry:
|
|
%call.i = tail call double @__nv_log(double noundef 2.000000e+00) #1
|
|
%call.i2 = tail call double @__nv_log(double noundef 2.000000e+00) #1
|
|
%div = fdiv double %call.i, %call.i2
|
|
store double %div, ptr @_ZL6Device, align 8, !tbaa !11
|
|
ret void
|
|
}
|
|
|
|
declare double @__nv_log(double)
|
|
|
|
declare i32 @__kmpc_get_hardware_thread_id_in_block()
|
|
|
|
declare void @__kmpc_barrier_simple_spmd(ptr, i32)
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { convergent nounwind }
|
|
|
|
!omp_offload.info = !{!0, !1, !2}
|
|
!llvm.module.flags = !{!5, !6, !7, !8, !9}
|
|
!llvm.ident = !{!10}
|
|
|
|
!0 = !{i32 0, i32 64770, i32 -2060960764, !"__omp_offloading__fd02_85283c04_Device_l6_ctor", i32 6, i32 1}
|
|
!1 = !{i32 0, i32 64770, i32 -2060960764, !"main", i32 11, i32 2}
|
|
!2 = !{i32 1, !"_ZL6Device", i32 0, i32 0}
|
|
!5 = !{i32 1, !"wchar_size", i32 4}
|
|
!6 = !{i32 7, !"openmp", i32 50}
|
|
!7 = !{i32 7, !"openmp-device", i32 50}
|
|
!8 = !{i32 7, !"PIC Level", i32 2}
|
|
!9 = !{i32 7, !"frame-pointer", i32 2}
|
|
!10 = !{!"clang version 14.0.0"}
|
|
!11 = !{!12, !12, i64 0}
|
|
!12 = !{!"double", !13, i64 0}
|
|
!13 = !{!"omnipotent char", !14, i64 0}
|
|
!14 = !{!"Simple C++ TBAA"}
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11
|
|
; CHECK-SAME: (ptr [[DYN:%.*]], ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment, ptr [[DYN]]) #[[ATTR1:[0-9]+]]
|
|
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
|
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
|
|
; CHECK: common.ret:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: user_code.entry:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA9:![0-9]+]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
|
|
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
|
|
; CHECK: region.guarded:
|
|
; CHECK-NEXT: store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA9]]
|
|
; CHECK-NEXT: br label [[REGION_BARRIER]]
|
|
; CHECK: region.barrier:
|
|
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1:[0-9]+]], i32 [[TMP2]]) #[[ATTR1]]
|
|
; CHECK-NEXT: tail call void @__kmpc_target_deinit() #[[ATTR1]]
|
|
; CHECK-NEXT: br label [[COMMON_RET]]
|
|
;
|
|
;
|
|
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor
|
|
; CHECK-SAME: () #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR2:[0-9]+]]
|
|
; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR2]]
|
|
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]]
|
|
; CHECK-NEXT: store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA9]]
|
|
; CHECK-NEXT: ret void
|
|
;
|