
This feature is currently not supported in the compiler. To facilitate this we emit a stub version of each kernel function body with different name mangling scheme, and replaces the respective kernel call-sites appropriately. Fixes https://github.com/llvm/llvm-project/issues/60313 D120566 was an earlier attempt made to upstream a solution for this issue. --------- Co-authored-by: anikelal <anikelal@amd.com>
46 lines
2.3 KiB
Common Lisp
46 lines
2.3 KiB
Common Lisp
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 4
|
|
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O0 -o - | FileCheck %s
|
|
|
|
bool device_function() {
|
|
return __nvvm_reflect("__CUDA_ARCH") >= 700;
|
|
}
|
|
|
|
__kernel void kernel_function(__global int *i) {
|
|
*i = device_function();
|
|
}
|
|
// CHECK-LABEL: define dso_local zeroext i1 @device_function(
|
|
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.reflect(ptr addrspacecast (ptr addrspace(4) @.str to ptr))
|
|
// CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[TMP0]], 700
|
|
// CHECK-NEXT: ret i1 [[CMP]]
|
|
//
|
|
//
|
|
// CHECK-LABEL: define dso_local ptx_kernel void @kernel_function(
|
|
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
|
|
// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR]], align 4
|
|
// CHECK-NEXT: call void @__clang_ocl_kern_imp_kernel_function(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR3:[0-9]+]]
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define dso_local ptx_kernel void @__clang_ocl_kern_imp_kernel_function(
|
|
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
|
|
// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
|
|
// CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @device_function() #[[ATTR3]]
|
|
// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR]], align 4
|
|
// CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[TMP0]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//.
|
|
// CHECK: [[META3]] = !{i32 1}
|
|
// CHECK: [[META4]] = !{!"none"}
|
|
// CHECK: [[META5]] = !{!"int*"}
|
|
// CHECK: [[META6]] = !{!""}
|
|
//.
|