Aniket Lal 642481a428
[Clang][OpenCL][AMDGPU] Allow a kernel to call another kernel (#115821)
This feature is currently not supported in the compiler.
To facilitate this we emit a stub version of each kernel
function body with different name mangling scheme, and
replaces the respective kernel call-sites appropriately.
    
Fixes https://github.com/llvm/llvm-project/issues/60313
    
D120566 was an earlier attempt made to upstream a solution
for this issue.

---------

Co-authored-by: anikelal <anikelal@amd.com>
2025-04-08 10:29:30 +05:30

87 lines
5.2 KiB
Common Lisp

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 4
// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)));
__kernel void test_printf_noargs() {
printf("");
}
__kernel void test_printf_int(int i) {
printf("%d", i);
}
__kernel void test_printf_str_int(int i) {
char s[] = "foo";
printf("%s:%d", s, i);
}
// CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_noargs(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META4]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @__clang_ocl_kern_imp_test_printf_noargs() #[[ATTR5:[0-9]+]]
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_noargs(
// CHECK-SAME: ) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META4]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str) #[[ATTR6:[0-9]+]]
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_int(
// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META5:![0-9]+]] !kernel_arg_access_qual [[META6:![0-9]+]] !kernel_arg_type [[META7:![0-9]+]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8:![0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9:![0-9]+]]
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
// CHECK-NEXT: call void @__clang_ocl_kern_imp_test_printf_int(i32 noundef [[TMP0]]) #[[ATTR5]]
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_int(
// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.1, i32 noundef [[TMP0]]) #[[ATTR6]]
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_str_int(
// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
// CHECK-NEXT: call void @__clang_ocl_kern_imp_test_printf_str_int(i32 noundef [[TMP0]]) #[[ATTR5]]
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_str_int(
// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5)
// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR7:[0-9]+]]
// CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 1 [[S]], ptr addrspace(4) align 1 @__const.test_printf_str_int.s, i64 4, i1 false)
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr addrspace(5) [[S]], i64 0, i64 0
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.2, ptr addrspace(5) noundef [[ARRAYDECAY]], i32 noundef [[TMP0]]) #[[ATTR6]]
// CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR7]]
// CHECK-NEXT: ret void
//
//.
// CHECK: [[META4]] = !{}
// CHECK: [[META5]] = !{i32 0}
// CHECK: [[META6]] = !{!"none"}
// CHECK: [[META7]] = !{!"int"}
// CHECK: [[META8]] = !{!""}
// CHECK: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
// CHECK: [[META10]] = !{!"int", [[META11:![0-9]+]], i64 0}
// CHECK: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0}
// CHECK: [[META12]] = !{!"Simple C/C++ TBAA"}
//.